Re: [RFC v2 05/14] lib: remove redundant newline from logs

2023-12-11 Thread Mattias Rönnblom

On 2023-12-08 15:59, David Marchand wrote:

Fix places where two newline characters may be logged.

Cc: sta...@dpdk.org

Signed-off-by: David Marchand 
---
Changes since RFC v1:
- split fixes on direct calls to printf or RTE_LOG in a previous patch,

---
  drivers/crypto/ipsec_mb/ipsec_mb_ops.c  |   2 +-
  lib/bbdev/rte_bbdev.c   |   6 +-
  lib/cfgfile/rte_cfgfile.c   |  14 ++--
  lib/compressdev/rte_compressdev_pmd.c   |   4 +-
  lib/cryptodev/rte_cryptodev.c   |   2 +-
  lib/dispatcher/rte_dispatcher.c |  12 +--
  lib/dmadev/rte_dmadev.c |   2 +-
  lib/eal/windows/eal_memory.c|   2 +-
  lib/eventdev/eventdev_pmd.h |   6 +-
  lib/eventdev/rte_event_crypto_adapter.c |  12 +--
  lib/eventdev/rte_event_dma_adapter.c|  14 ++--
  lib/eventdev/rte_event_eth_rx_adapter.c |  28 +++
  lib/eventdev/rte_event_eth_tx_adapter.c |   2 +-
  lib/eventdev/rte_event_timer_adapter.c  |   4 +-
  lib/eventdev/rte_eventdev.c |   4 +-
  lib/metrics/rte_metrics_telemetry.c |   2 +-
  lib/mldev/rte_mldev.c   | 102 
  lib/net/rte_net_crc.c   |   6 +-
  lib/node/ethdev_rx.c|   4 +-
  lib/node/ip4_lookup.c   |   2 +-
  lib/node/ip6_lookup.c   |   2 +-
  lib/node/kernel_rx.c|   8 +-
  lib/node/kernel_tx.c|   4 +-
  lib/rcu/rte_rcu_qsbr.c  |   4 +-
  lib/rcu/rte_rcu_qsbr.h  |   8 +-
  lib/stack/rte_stack.c   |   8 +-
  lib/vhost/vhost_crypto.c|   6 +-
  27 files changed, 135 insertions(+), 135 deletions(-)



Acked-by: Mattias Rönnblom 



Re: [PATCH v2 2/2] net/cnxk: dump Rx descriptor info to file

2023-12-11 Thread Jerin Jacob
On Thu, Dec 7, 2023 at 9:13 PM Rakesh Kudurumalla
 wrote:
>
> Add support for eth_rx_descriptor_dump for cn9k and cn10k.
> This patch dumps contents of receviced packet descriptor from CQ
> for debug to file
>
> Signed-off-by: Rakesh Kudurumalla 

Series applied to dpdk-next-net-mrvl/for-main. Thanks


[PATCH] examples/ipsec-secgw: fix cryptodev to SA mapping

2023-12-11 Thread Radu Nicolau
There are use cases where a SA should be able to use different cryptodevs on
different lcores, for example there can be cryptodevs with just 1 qp per VF.
For this purpose this patch relaxes the check in create lookaside session 
function.
Also add a check to verify that a CQP is available for the current lcore.

Fixes: a8ade12123c3 ("examples/ipsec-secgw: create lookaside sessions at init")
Cc: sta...@dpdk.org
Cc: vfia...@marvell.com

Signed-off-by: Radu Nicolau 
---
 examples/ipsec-secgw/ipsec.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
index f5cec4a928..593eab4e73 100644
--- a/examples/ipsec-secgw/ipsec.c
+++ b/examples/ipsec-secgw/ipsec.c
@@ -288,10 +288,9 @@ create_lookaside_session(struct ipsec_ctx 
*ipsec_ctx_lcore[],
if (cdev_id == RTE_CRYPTO_MAX_DEVS)
cdev_id = ipsec_ctx->tbl[cdev_id_qp].id;
else if (cdev_id != ipsec_ctx->tbl[cdev_id_qp].id) {
-   RTE_LOG(ERR, IPSEC,
-   "SA mapping to multiple cryptodevs is "
-   "not supported!");
-   return -EINVAL;
+   RTE_LOG(WARNING, IPSEC,
+   "SA mapped to multiple cryptodevs for SPI %d\n",
+   sa->spi);
}
 
/* Store per core queue pair information */
@@ -908,7 +907,11 @@ ipsec_enqueue(ipsec_xform_fn xform_func, struct ipsec_ctx 
*ipsec_ctx,
continue;
}
 
-   enqueue_cop(sa->cqp[ipsec_ctx->lcore_id], &priv->cop);
+   if (sa->cqp[ipsec_ctx->lcore_id])
+   enqueue_cop(sa->cqp[ipsec_ctx->lcore_id], &priv->cop);
+   else
+   RTE_LOG(ERR, IPSEC, "No CQP available for lcore %d\n",
+   ipsec_ctx->lcore_id);
}
 }
 
-- 
2.25.1



[RFC v3 03/12] argparse: support verify argument config

2023-12-11 Thread Chengwen Feng
This commit supports verify argument config.

Signed-off-by: Chengwen Feng 
---
 lib/argparse/rte_argparse.c | 307 +++-
 1 file changed, 306 insertions(+), 1 deletion(-)

diff --git a/lib/argparse/rte_argparse.c b/lib/argparse/rte_argparse.c
index bf14c56858..6fdcf4f07b 100644
--- a/lib/argparse/rte_argparse.c
+++ b/lib/argparse/rte_argparse.c
@@ -2,13 +2,318 @@
  * Copyright(c) 2023 HiSilicon Limited
  */
 
+#include 
+#include 
+#include 
+
+#include 
+
 #include "rte_argparse.h"
 
+RTE_LOG_REGISTER_DEFAULT(rte_argparse_logtype, INFO);
+#define ARGPARSE_LOG(level, ...) \
+   rte_log(RTE_LOG_ ## level, rte_argparse_logtype, RTE_FMT("argparse: " \
+   RTE_FMT_HEAD(__VA_ARGS__,) "\n", RTE_FMT_TAIL(__VA_ARGS__,)))
+
+#define ARG_ATTR_HAS_VAL_MASK  RTE_GENMASK64(1, 0)
+#define ARG_ATTR_VAL_TYPE_MASK RTE_GENMASK64(9, 2)
+#define ARG_ATTR_SUPPORT_MULTI_MASKRTE_BIT64(10)
+#define ARG_ATTR_FLAG_PARSED_MASK  RTE_BIT64(63)
+
+static inline bool
+is_arg_optional(const struct rte_argparse_arg *arg)
+{
+   return arg->name_long[0] == '-';
+}
+
+static inline bool
+is_arg_positional(const struct rte_argparse_arg *arg)
+{
+   return arg->name_long[0] != '-';
+}
+
+static inline uint32_t
+arg_attr_has_val(const struct rte_argparse_arg *arg)
+{
+   return RTE_FIELD_GET64(ARG_ATTR_HAS_VAL_MASK, arg->flags);
+}
+
+static inline uint32_t
+arg_attr_val_type(const struct rte_argparse_arg *arg)
+{
+   return RTE_FIELD_GET64(ARG_ATTR_VAL_TYPE_MASK, arg->flags);
+}
+
+static inline bool
+arg_attr_flag_multi(const struct rte_argparse_arg *arg)
+{
+   return RTE_FIELD_GET64(ARG_ATTR_SUPPORT_MULTI_MASK, arg->flags);
+}
+
+static inline uint32_t
+arg_attr_unused_bits(const struct rte_argparse_arg *arg)
+{
+#define USED_BIT_MASK  (ARG_ATTR_HAS_VAL_MASK | ARG_ATTR_VAL_TYPE_MASK | \
+ARG_ATTR_SUPPORT_MULTI_MASK)
+   return arg->flags & ~USED_BIT_MASK;
+}
+
+static int
+verify_arg_name(const struct rte_argparse_arg *arg)
+{
+   if (is_arg_optional(arg)) {
+   if (strlen(arg->name_long) <= 3) {
+   ARGPARSE_LOG(ERR, "optional long name %s too short!", 
arg->name_long);
+   return -EINVAL;
+   }
+   if (arg->name_long[1] != '-') {
+   ARGPARSE_LOG(ERR, "optional long name %s must only 
start with '--'",
+arg->name_long);
+   return -EINVAL;
+   }
+   if (arg->name_long[2] == '-') {
+   ARGPARSE_LOG(ERR, "optional long name %s should not 
start with '---'",
+arg->name_long);
+   return -EINVAL;
+   }
+   }
+
+   if (arg->name_short == NULL)
+   return 0;
+
+   if (!is_arg_optional(arg)) {
+   ARGPARSE_LOG(ERR, "short name %s corresponding long name must 
be optional!",
+arg->name_short);
+   return -EINVAL;
+   }
+
+   if (strlen(arg->name_short) != 2 || arg->name_short[0] != '-' ||
+   arg->name_short[1] == '-') {
+   ARGPARSE_LOG(ERR, "short name %s must start with a hyphen (-) 
followed by an English letter",
+arg->name_short);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+verify_arg_help(const struct rte_argparse_arg *arg)
+{
+   if (arg->help == NULL) {
+   ARGPARSE_LOG(ERR, "argument %s must have help info!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+verify_arg_has_val(const struct rte_argparse_arg *arg)
+{
+   uint32_t has_val = arg_attr_has_val(arg);
+
+   if (is_arg_positional(arg)) {
+   if (has_val == RTE_ARGPARSE_ARG_REQUIRED_VALUE)
+   return 0;
+   ARGPARSE_LOG(ERR, "argument %s is positional, should has zero 
or required-val!",
+arg->name_long);
+   return -EINVAL;
+   }
+
+   if (has_val == 0) {
+   ARGPARSE_LOG(ERR, "argument %s is optional, has-val config 
wrong!",
+arg->name_long);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+verify_arg_saver(const struct rte_argparse *obj, uint32_t index)
+{
+   uint32_t cmp_max = RTE_FIELD_GET64(ARG_ATTR_VAL_TYPE_MASK, 
RTE_ARGPARSE_ARG_VALUE_MAX);
+   const struct rte_argparse_arg *arg = &obj->args[index];
+   uint32_t val_type = arg_attr_val_type(arg);
+   uint32_t has_val = arg_attr_has_val(arg);
+
+   if (arg->val_saver == NULL) {
+   if (val_type != 0) {
+   ARGPARSE_LOG(ERR, "argument %s parse by callback, 
val-type must be zero!",
+arg->name_long);
+   return -EINVAL;
+   

[RFC v3 07/12] argparse: provide parsing known type API

2023-12-11 Thread Chengwen Feng
Provide API which could parsing the value from the input string based
on the value type. This API could used in user callback when parsing
string by argparse or kvargs library.

Signed-off-by: Chengwen Feng 
---
 lib/argparse/rte_argparse.c | 19 +++
 lib/argparse/rte_argparse.h | 19 +++
 lib/argparse/version.map|  1 +
 3 files changed, 39 insertions(+)

diff --git a/lib/argparse/rte_argparse.c b/lib/argparse/rte_argparse.c
index cc5493c6be..6b82f58eaf 100644
--- a/lib/argparse/rte_argparse.c
+++ b/lib/argparse/rte_argparse.c
@@ -600,3 +600,22 @@ rte_argparse_parse(struct rte_argparse *obj, int argc, 
char **argv)
exit(ret);
return ret;
 }
+
+int
+rte_argparse_parse_type(const char *str, uint64_t val_type, void *val)
+{
+   uint32_t cmp_max = RTE_FIELD_GET64(ARG_ATTR_VAL_TYPE_MASK, 
RTE_ARGPARSE_ARG_VALUE_MAX);
+   struct rte_argparse_arg arg = {
+   .name_long = str,
+   .name_short = NULL,
+   .val_saver = val,
+   .val_set = NULL,
+   .flags = val_type,
+   };
+   uint32_t value_type = arg_attr_val_type(&arg);
+
+   if (value_type == 0 || value_type >= cmp_max)
+   return -EINVAL;
+
+   return parse_arg_autosave(&arg, str);
+}
diff --git a/lib/argparse/rte_argparse.h b/lib/argparse/rte_argparse.h
index 72eea7cf87..5e40431e5b 100644
--- a/lib/argparse/rte_argparse.h
+++ b/lib/argparse/rte_argparse.h
@@ -184,6 +184,25 @@ struct rte_argparse {
 __rte_experimental
 int rte_argparse_parse(struct rte_argparse *obj, int argc, char **argv);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Parse the value from the input string based on the value type.
+ *
+ * @param str
+ *   Input string.
+ * @param val_type
+ *   The value type, @see RTE_ARGPARSE_ARG_VALUE_INT or other type.
+ * @param val
+ *   Saver for the value.
+ *
+ * @return
+ *   0 on success. Otherwise negative value is returned.
+ */
+__rte_experimental
+int rte_argparse_parse_type(const char *str, uint64_t val_type, void *val);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/argparse/version.map b/lib/argparse/version.map
index 1c176f69e9..9b68464600 100644
--- a/lib/argparse/version.map
+++ b/lib/argparse/version.map
@@ -2,6 +2,7 @@ EXPERIMENTAL {
global:
 
rte_argparse_parse;
+   rte_argparse_parse_type;
 
local: *;
 };
-- 
2.17.1



[RFC v3 00/12] add argparse library

2023-12-11 Thread Chengwen Feng
Introduce argparse library (which was inspired by the thread [1]),
compared with getopt, it makes it easy to write user-friendly
command-like program.

Note: the [1/6] commit contains usage examples.

[1] 
https://patchwork.dpdk.org/project/dpdk/patch/20231105054539.22303-2-fengcheng...@huawei.com/

Chengwen Feng (12):
  eal: introduce more macro for bit definition
  argparse: add argparse library
  argparse: support verify argument config
  test/argparse: add verify argument config test
  argparse: support parse parameters
  test/argparse: add parse parameters test
  argparse: provide parsing known type API
  test/argparse: add parse type test
  argparse: support parse unsigned base type
  test/argparse: add parse unsigned base type test
  argparse: pretty help info
  examples/dma: replace getopt with argparse

---
v3: refine the implement, standard new marco for bit definition, add
reserved field, extend flags to 64bit, support u8/u16/u32/u64, add
rte_argparse_parse_type() API, pretty help info, fix dmafwd bug,
also address some of Stephen's comments.
v2: refine the definition, add implement code, add examples which
address Stephen's comments.

 app/test/meson.build   |   1 +
 app/test/test_argparse.c   | 835 +
 doc/api/doxy-api-index.md  |   1 +
 doc/api/doxy-api.conf.in   |   1 +
 doc/guides/prog_guide/argparse_lib.rst | 141 +
 doc/guides/prog_guide/index.rst|   1 +
 examples/dma/dmafwd.c  | 279 -
 examples/dma/meson.build   |   2 +-
 lib/argparse/meson.build   |   7 +
 lib/argparse/rte_argparse.c| 782 +++
 lib/argparse/rte_argparse.h| 218 +++
 lib/argparse/version.map   |   8 +
 lib/eal/include/rte_bitops.h   |  64 ++
 lib/meson.build|   1 +
 14 files changed, 2187 insertions(+), 154 deletions(-)
 create mode 100644 app/test/test_argparse.c
 create mode 100644 doc/guides/prog_guide/argparse_lib.rst
 create mode 100644 lib/argparse/meson.build
 create mode 100644 lib/argparse/rte_argparse.c
 create mode 100644 lib/argparse/rte_argparse.h
 create mode 100644 lib/argparse/version.map

-- 
2.17.1



[RFC v3 05/12] argparse: support parse parameters

2023-12-11 Thread Chengwen Feng
This commit supports parse parameters which described in [argc, argv].

Signed-off-by: Chengwen Feng 
---
 lib/argparse/rte_argparse.c | 289 +++-
 1 file changed, 286 insertions(+), 3 deletions(-)

diff --git a/lib/argparse/rte_argparse.c b/lib/argparse/rte_argparse.c
index 6fdcf4f07b..cc5493c6be 100644
--- a/lib/argparse/rte_argparse.c
+++ b/lib/argparse/rte_argparse.c
@@ -298,18 +298,301 @@ verify_argparse(const struct rte_argparse *obj)
return 0;
 }
 
+static uint32_t
+calc_position_count(const struct rte_argparse *obj)
+{
+   const struct rte_argparse_arg *arg;
+   uint32_t count = 0;
+   uint32_t i;
+
+   for (i = 0; /* NULL */; i++) {
+   arg = &obj->args[i];
+   if (obj->args[i].name_long == NULL)
+   break;
+   if (is_arg_positional(arg))
+   count++;
+   }
+
+   return count;
+}
+
+static struct rte_argparse_arg *
+find_position_arg(struct rte_argparse *obj, uint32_t index)
+{
+   struct rte_argparse_arg *arg;
+   uint32_t count = 0;
+   uint32_t i;
+
+   for (i = 0; /* NULL */; i++) {
+   arg = &obj->args[i];
+   if (arg->name_long == NULL)
+   break;
+   if (!is_arg_positional(arg))
+   continue;
+   count++;
+   if (count == index)
+   return arg;
+   }
+
+   return NULL;
+}
+
+static bool
+is_arg_match(struct rte_argparse_arg *arg, const char *curr_argv, uint32_t len)
+{
+   if (strlen(arg->name_long) == len && strncmp(arg->name_long, curr_argv, 
len) == 0)
+   return true;
+
+   if (arg->name_short == NULL)
+   return false;
+
+   if (strlen(arg->name_short) == len && strncmp(arg->name_short, 
curr_argv, len) == 0)
+   return true;
+
+   return false;
+}
+
+static struct rte_argparse_arg *
+find_option_arg(struct rte_argparse *obj, const char *curr_argv, const char 
*has_equal)
+{
+   uint32_t len = strlen(curr_argv) - (has_equal != NULL ? 
strlen(has_equal) : 0);
+   struct rte_argparse_arg *arg;
+   uint32_t i;
+   bool match;
+
+   for (i = 0; /* nothing */; i++) {
+   arg = &obj->args[i];
+   if (arg->name_long == NULL)
+   break;
+   match = is_arg_match(arg, curr_argv, len);
+   if (match)
+   return arg;
+   }
+
+   return NULL;
+}
+
+static int
+parse_arg_int(struct rte_argparse_arg *arg, const char *value)
+{
+   char *s = NULL;
+
+   if (value == NULL) {
+   *(int *)arg->val_saver = (int)(intptr_t)arg->val_set;
+   return 0;
+   }
+
+   errno = 0;
+   *(int *)arg->val_saver = strtol(value, &s, 0);
+   if (errno == ERANGE) {
+   ARGPARSE_LOG(ERR, "argument %s numerical out of range!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   if (s[0] != '\0') {
+   ARGPARSE_LOG(ERR, "argument %s expect an integer value!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+parse_arg_autosave(struct rte_argparse_arg *arg, const char *value)
+{
+   static struct {
+   int (*f_parse_type)(struct rte_argparse_arg *arg, const char 
*value);
+   } map[] = {
+   /* Sort by RTE_ARGPARSE_ARG_VALUE_XXX. */
+   { NULL  },
+   { parse_arg_int },
+   };
+   uint32_t index = arg_attr_val_type(arg);
+   int ret = -EINVAL;
+
+   if (index > 0 && index < RTE_DIM(map))
+   ret = map[index].f_parse_type(arg, value);
+
+   return ret;
+}
+
+static int
+parse_arg_val(struct rte_argparse *obj, struct rte_argparse_arg *arg, char 
*value)
+{
+   int ret;
+
+   if (arg->val_saver == NULL)
+   ret = obj->callback((uint32_t)(uintptr_t)arg->val_set, value, 
obj->opaque);
+   else
+   ret = parse_arg_autosave(arg, value);
+   if (ret != 0) {
+   ARGPARSE_LOG(ERR, "argument %s parse value fail!", 
arg->name_long);
+   return ret;
+   }
+
+   return 0;
+}
+
+static bool
+is_help(const char *curr_argv)
+{
+   return strcmp(curr_argv, "-h") == 0 || strcmp(curr_argv, "--help") == 0;
+}
+
+static int
+parse_args(struct rte_argparse *obj, int argc, char **argv, bool *show_help)
+{
+   uint32_t position_count = calc_position_count(obj);
+   struct rte_argparse_arg *arg;
+   uint32_t position_index = 0;
+   char *curr_argv;
+   char *has_equal;
+   char *value;
+   int ret;
+   int i;
+
+   for (i = 1; i < argc; i++) {
+   curr_argv = argv[i];
+   if (curr_argv[0] != '-') {
+   /* process positional parameters. */
+   position_index++;
+   if (position_index > pos

[RFC v3 01/12] eal: introduce more macro for bit definition

2023-12-11 Thread Chengwen Feng
Introduce macros: RTE_MBIT64/RTE_MBIT32, RTE_GENMASK64/RTE_GENMASK32,
and RTE_FIELD_GET64/RTE_FIELD_GET32.

Signed-off-by: Chengwen Feng 
---
 lib/eal/include/rte_bitops.h | 64 
 1 file changed, 64 insertions(+)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6bd8bae21a..e1f3c4b195 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -39,6 +39,70 @@ extern "C" {
  */
 #define RTE_BIT32(nr) (UINT32_C(1) << (nr))
 
+/**
+ * Get the uint64_t value for a multiple bits set.
+ *
+ * @param val
+ *   The value may not all 1s.
+ * @param nr
+ *   The bit number in range of 0 to (64 - width of val).
+ */
+#define RTE_MBIT64(val, nr) (UINT64_C(val) << (nr))
+
+/**
+ * Get the uint32_t value for a multiple bits set.
+ *
+ * @param val
+ *   The value may not all 1s.
+ * @param nr
+ *   The bit number in range of 0 to (32 - width of val).
+ */
+#define RTE_MBIT32(val, nr) (UINT32_C(val) << (nr))
+
+/**
+ * Generate a contiguous 64bit bitmask starting at bit position low
+ * and ending at position high.
+ *
+ * @param high
+ *   High bit position.
+ * @param low
+ *   Low bit position.
+ */
+#define RTE_GENMASK64(high, low) (((~UINT64_C(0)) << (low)) & (~UINT64_C(0) >> 
(63u - (high
+
+/**
+ * Generate a contiguous 32bit bitmask starting at bit position low
+ * and ending at position high.
+ *
+ * @param high
+ *   High bit position.
+ * @param low
+ *   Low bit position.
+ */
+#define RTE_GENMASK32(high, low) (((~UINT32_C(0)) << (low)) & (~UINT32_C(0) >> 
(31u - (high
+
+/**
+ * Extract a 64bit field element.
+ *
+ * @param mask
+ *   shifted mask.
+ * @param reg
+ *   value of entire bitfield.
+ */
+#define RTE_FIELD_GET64(mask, reg) \
+   (typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask))
+
+/**
+ * Extract a 32bit field element.
+ *
+ * @param mask
+ *   shifted mask.
+ * @param reg
+ *   value of entire bitfield.
+ */
+#define RTE_FIELD_GET32(mask, reg) \
+   (typeof(mask))(((reg) & (mask)) >> rte_ctz32(mask))
+
 /* 32-bit relaxed operations */
 
 /**
-- 
2.17.1



[RFC v3 08/12] test/argparse: add parse type test

2023-12-11 Thread Chengwen Feng
This commit adds parse type test.

Signed-off-by: Chengwen Feng 
---
 app/test/test_argparse.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/app/test/test_argparse.c b/app/test/test_argparse.c
index 3e4f4a2cfa..06336714d9 100644
--- a/app/test/test_argparse.c
+++ b/app/test/test_argparse.c
@@ -729,6 +729,27 @@ test_argparse_pos_callback_parse_int(void)
return 0;
 }
 
+static int
+test_argparse_parse_type(void)
+{
+   char *str_erange = test_strdup("99");
+   char *str_invalid = test_strdup("1a");
+   char *str_ok = test_strdup("123");
+   int value;
+   int ret;
+
+   /* test for int parsing */
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
+   TEST_ASSERT(value == 123, "Argparse parse type expect failed!");
+
+   return 0;
+}
+
 static struct unit_test_suite argparse_test_suite  = {
.suite_name = "Argparse Unit Test Suite",
.setup = test_argparse_setup,
@@ -750,6 +771,7 @@ static struct unit_test_suite argparse_test_suite  = {
TEST_CASE(test_argparse_opt_callback_parse_int_of_optional_val),
TEST_CASE(test_argparse_pos_autosave_parse_int),
TEST_CASE(test_argparse_pos_callback_parse_int),
+   TEST_CASE(test_argparse_parse_type),
 
TEST_CASES_END() /**< NULL terminate unit test array */
}
-- 
2.17.1



[RFC v3 04/12] test/argparse: add verify argument config test

2023-12-11 Thread Chengwen Feng
This commit adds verify argument config test.

Signed-off-by: Chengwen Feng 
---
 app/test/meson.build |   1 +
 app/test/test_argparse.c | 327 +++
 2 files changed, 328 insertions(+)
 create mode 100644 app/test/test_argparse.c

diff --git a/app/test/meson.build b/app/test/meson.build
index dcc93f4a43..864b79d39f 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -27,6 +27,7 @@ source_file_deps = {
 # the various test_*.c files
 'test_acl.c': ['net', 'acl'],
 'test_alarm.c': [],
+'test_argparse.c': ['argparse'],
 'test_atomic.c': ['hash'],
 'test_barrier.c': [],
 'test_bitcount.c': [],
diff --git a/app/test/test_argparse.c b/app/test/test_argparse.c
new file mode 100644
index 00..d38ffb5775
--- /dev/null
+++ b/app/test/test_argparse.c
@@ -0,0 +1,327 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 HiSilicon Limited
+ */
+
+#include 
+#include 
+
+#include 
+
+#include "test.h"
+
+static int default_argc;
+static char *default_argv[1];
+
+/*
+ * Define strdup wrapper.
+ * 1. Mainly to fix compile error "warning: assignment discards 'const'
+ *qualifier from pointer target type [-Wdiscarded-qualifiers]" for
+ *following code:
+ *  argv[x] = "100";
+ * 2. Because this is a test, the memory release which allocated by this
+ *wrapper in the subtest is not considered.
+ */
+static char *
+test_strdup(const char *str)
+{
+   char *s = strdup(str);
+   if (s == NULL)
+   exit(-ENOMEM);
+   return s;
+}
+
+static int
+test_argparse_setup(void)
+{
+   default_argc = 1;
+   default_argv[0] = test_strdup("test_argparse");
+   return 0;
+}
+
+static void
+test_argparse_teardown(void)
+{
+   free(default_argv[0]);
+}
+
+static int
+test_argparse_callback(uint32_t index, const char *value, void *opaque)
+{
+   RTE_SET_USED(index);
+   RTE_SET_USED(value);
+   RTE_SET_USED(opaque);
+   return 0;
+}
+
+/* valid templater, must contain at least two args. */
+#define argparse_templater() { \
+   .prog_name = "test_argparse", \
+   .usage = "-a xx -b yy", \
+   .descriptor = NULL, \
+   .epilog = NULL, \
+   .exit_on_error = false, \
+   .callback = test_argparse_callback, \
+   .args = { \
+   { "--abc", "-a", "abc argument", (void *)1, (void *)1, 
RTE_ARGPARSE_ARG_NO_VALUE | RTE_ARGPARSE_ARG_VALUE_INT }, \
+   { "--xyz", "-x", "xyz argument", (void *)1, (void *)2, 
RTE_ARGPARSE_ARG_NO_VALUE | RTE_ARGPARSE_ARG_VALUE_INT }, \
+   ARGPARSE_ARG_END(), \
+   }, \
+}
+
+static void
+test_argparse_copy(struct rte_argparse *dst, struct rte_argparse *src)
+{
+   uint32_t i;
+   memcpy(dst, src, sizeof(*src));
+   for (i = 0; /* NULL */; i++) {
+   memcpy(&dst->args[i], &src->args[i], sizeof(src->args[i]));
+   if (src->args[i].name_long == NULL)
+   break;
+   }
+}
+
+static struct rte_argparse *
+test_argparse_init_obj(void)
+{
+   static struct rte_argparse backup = argparse_templater();
+   static struct rte_argparse obj = argparse_templater();
+   test_argparse_copy(&obj, &backup);
+   return &obj;
+}
+
+static int
+test_argparse_invalid_basic_param(void)
+{
+   struct rte_argparse *obj;
+   int ret;
+
+   obj = test_argparse_init_obj();
+   obj->prog_name = NULL;
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   obj->usage = NULL;
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   return TEST_SUCCESS;
+}
+
+static int
+test_argparse_invalid_arg_name(void)
+{
+   struct rte_argparse *obj;
+   int ret;
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "-ab";
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "-abc";
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "---c";
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "abc";
+   obj->args[0].name_short = "-a";
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_short = "a";
+   ret = rte_argparse_parse(obj, default_argc, default_argv);
+   TEST_

[RFC v3 09/12] argparse: support parse unsigned base type

2023-12-11 Thread Chengwen Feng
This commit supports parsing unsigned base type (u8/u16/u32/u64).

Signed-off-by: Chengwen Feng 
---
 lib/argparse/rte_argparse.c | 116 
 lib/argparse/rte_argparse.h |  10 +++-
 2 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/lib/argparse/rte_argparse.c b/lib/argparse/rte_argparse.c
index 6b82f58eaf..bc5b9e87c1 100644
--- a/lib/argparse/rte_argparse.c
+++ b/lib/argparse/rte_argparse.c
@@ -397,6 +397,118 @@ parse_arg_int(struct rte_argparse_arg *arg, const char 
*value)
return 0;
 }
 
+static int
+parse_arg_u8(struct rte_argparse_arg *arg, const char *value)
+{
+   unsigned long val;
+   char *s = NULL;
+
+   if (value == NULL) {
+   *(uint8_t *)arg->val_saver = (uint8_t)(intptr_t)arg->val_set;
+   return 0;
+   }
+
+   errno = 0;
+   val = strtoul(value, &s, 0);
+   if (errno == ERANGE || val > UINT8_MAX) {
+   ARGPARSE_LOG(ERR, "argument %s numerical out of range!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   if (s[0] != '\0') {
+   ARGPARSE_LOG(ERR, "argument %s expect an uint8 value!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   *(uint8_t *)arg->val_saver = val;
+
+   return 0;
+}
+
+static int
+parse_arg_u16(struct rte_argparse_arg *arg, const char *value)
+{
+   unsigned long val;
+   char *s = NULL;
+
+   if (value == NULL) {
+   *(uint16_t *)arg->val_saver = (uint16_t)(intptr_t)arg->val_set;
+   return 0;
+   }
+
+   errno = 0;
+   val = strtoul(value, &s, 0);
+   if (errno == ERANGE || val > UINT16_MAX) {
+   ARGPARSE_LOG(ERR, "argument %s numerical out of range!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   if (s[0] != '\0') {
+   ARGPARSE_LOG(ERR, "argument %s expect an uint16 value!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   *(uint16_t *)arg->val_saver = val;
+
+   return 0;
+}
+
+static int
+parse_arg_u32(struct rte_argparse_arg *arg, const char *value)
+{
+   unsigned long val;
+   char *s = NULL;
+
+   if (value == NULL) {
+   *(uint32_t *)arg->val_saver = (uint32_t)(intptr_t)arg->val_set;
+   return 0;
+   }
+
+   errno = 0;
+   val = strtoul(value, &s, 0);
+   if (errno == ERANGE || val > UINT32_MAX) {
+   ARGPARSE_LOG(ERR, "argument %s numerical out of range!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   if (s[0] != '\0') {
+   ARGPARSE_LOG(ERR, "argument %s expect an uint32 value!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   *(uint32_t *)arg->val_saver = val;
+
+   return 0;
+}
+
+static int
+parse_arg_u64(struct rte_argparse_arg *arg, const char *value)
+{
+   unsigned long val;
+   char *s = NULL;
+
+   if (value == NULL) {
+   *(uint64_t *)arg->val_saver = (uint64_t)(intptr_t)arg->val_set;
+   return 0;
+   }
+
+   errno = 0;
+   val = strtoull(value, &s, 0);
+   if (errno == ERANGE) {
+   ARGPARSE_LOG(ERR, "argument %s numerical out of range!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   if (s[0] != '\0') {
+   ARGPARSE_LOG(ERR, "argument %s expect an uint64 value!", 
arg->name_long);
+   return -EINVAL;
+   }
+
+   *(uint64_t *)arg->val_saver = val;
+
+   return 0;
+}
+
 static int
 parse_arg_autosave(struct rte_argparse_arg *arg, const char *value)
 {
@@ -406,6 +518,10 @@ parse_arg_autosave(struct rte_argparse_arg *arg, const 
char *value)
/* Sort by RTE_ARGPARSE_ARG_VALUE_XXX. */
{ NULL  },
{ parse_arg_int },
+   { parse_arg_u8  },
+   { parse_arg_u16 },
+   { parse_arg_u32 },
+   { parse_arg_u64 },
};
uint32_t index = arg_attr_val_type(arg);
int ret = -EINVAL;
diff --git a/lib/argparse/rte_argparse.h b/lib/argparse/rte_argparse.h
index 5e40431e5b..89f4bda698 100644
--- a/lib/argparse/rte_argparse.h
+++ b/lib/argparse/rte_argparse.h
@@ -59,8 +59,16 @@ enum rte_argparse_flag {
 
/** The argument's value is int type. */
RTE_ARGPARSE_ARG_VALUE_INT = RTE_MBIT64(1, 2),
+   /** The argument's value is uint8 type. */
+   RTE_ARGPARSE_ARG_VALUE_U8 = RTE_MBIT64(2, 2),
+   /** The argument's value is uint16 type. */
+   RTE_ARGPARSE_ARG_VALUE_U16 = RTE_MBIT64(3, 2),
+   /** The argument's value is uint32 type. */
+   RTE_ARGPARSE_ARG_VALUE_U32 = RTE_MBIT64(4, 2),
+   /** The argument's value is uint64 type. */
+   RTE_ARGPARSE_ARG_VALUE_U64 = RTE_MBIT64(5, 2),
/** Max value type. */
-   RTE_ARGPARSE_ARG_VALUE_MAX = RTE_MBIT64(2, 2),
+   RTE_ARGPARSE_ARG_VALUE_MAX = RTE_MBIT64(6, 2),
 
 
/**
-- 
2.17.1



[RFC v3 06/12] test/argparse: add parse parameters test

2023-12-11 Thread Chengwen Feng
This commit adds parse parameters test.

Signed-off-by: Chengwen Feng 
---
 app/test/test_argparse.c | 437 +++
 1 file changed, 437 insertions(+)

diff --git a/app/test/test_argparse.c b/app/test/test_argparse.c
index d38ffb5775..3e4f4a2cfa 100644
--- a/app/test/test_argparse.c
+++ b/app/test/test_argparse.c
@@ -301,6 +301,434 @@ test_argparse_invalid_arg_repeat(void)
return 0;
 }
 
+static int
+test_argparse_invalid_option(void)
+{
+   struct rte_argparse *obj;
+   char *argv[2];
+   int ret;
+
+   obj = test_argparse_init_obj();
+   argv[0] = test_strdup(obj->usage);
+   argv[1] = test_strdup("--invalid");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   obj = test_argparse_init_obj();
+   argv[0] = test_strdup(obj->usage);
+   argv[1] = test_strdup("invalid");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   return 0;
+}
+
+static int
+test_argparse_opt_autosave_parse_int_of_no_val(void)
+{
+   uint32_t flags = RTE_ARGPARSE_ARG_NO_VALUE | RTE_ARGPARSE_ARG_VALUE_INT;
+   struct rte_argparse *obj;
+   int val_saver = 0;
+   char *argv[2];
+   int ret;
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "--test-long";
+   obj->args[0].name_short = "-t";
+   obj->args[0].val_saver = (void *)&val_saver;
+   obj->args[0].val_set = (void *)100;
+   obj->args[0].flags = flags;
+   obj->args[1].name_long = NULL;
+   argv[0] = test_strdup(obj->usage);
+   argv[1] = test_strdup("--test-long");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+
+   obj->args[0].flags = flags;
+   val_saver = 0;
+   argv[1] = test_strdup("-t");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+
+   return 0;
+}
+
+static int
+test_argparse_opt_autosave_parse_int_of_required_val(void)
+{
+   uint32_t flags = RTE_ARGPARSE_ARG_REQUIRED_VALUE | 
RTE_ARGPARSE_ARG_VALUE_INT;
+   struct rte_argparse *obj;
+   int val_saver = 0;
+   char *argv[3];
+   int ret;
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "--test-long";
+   obj->args[0].name_short = "-t";
+   obj->args[0].val_saver = (void *)&val_saver;
+   obj->args[0].val_set = NULL;
+   obj->args[0].flags = flags;
+   obj->args[1].name_long = NULL;
+   argv[0] = test_strdup(obj->usage);
+   argv[1] = test_strdup("--test-long");
+   argv[2] = test_strdup("100");
+   ret = rte_argparse_parse(obj, 3, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+
+   obj->args[0].flags = flags;
+   val_saver = 0;
+   argv[1] = test_strdup("-t");
+   ret = rte_argparse_parse(obj, 3, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+
+   /* test invalid value. */
+   obj->args[0].flags = flags;
+   val_saver = 0;
+   argv[1] = test_strdup("-t");
+   argv[2] = test_strdup("100a");
+   ret = rte_argparse_parse(obj, 3, argv);
+   TEST_ASSERT(ret == -EINVAL, "Argparse parse expect failed!");
+
+   return 0;
+}
+
+static int
+test_argparse_opt_autosave_parse_int_of_optional_val(void)
+{
+   uint32_t flags = RTE_ARGPARSE_ARG_OPTIONAL_VALUE | 
RTE_ARGPARSE_ARG_VALUE_INT;
+   struct rte_argparse *obj;
+   int val_saver = 0;
+   char *argv[2];
+   int ret;
+
+   obj = test_argparse_init_obj();
+   obj->args[0].name_long = "--test-long";
+   obj->args[0].name_short = "-t";
+   obj->args[0].val_saver = (void *)&val_saver;
+   obj->args[0].val_set = (void *)100;
+   obj->args[0].flags = flags;
+   obj->args[1].name_long = NULL;
+   argv[0] = test_strdup(obj->usage);
+   argv[1] = test_strdup("--test-long");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+   obj->args[0].flags = flags;
+   val_saver = 0;
+   argv[1] = test_strdup("-t");
+   ret = rte_argparse_parse(obj, 2, argv);
+   TEST_ASSERT(ret == 0, "Argparse parse expect success!");
+   TEST_ASSERT(val_saver == 100, "Argparse parse expect success!");
+
+   /* test with value. */
+   obj->args[0].flags = flags;
+   val_saver = 0;
+   argv[1] = test_strdup("--test-long=200");
+   ret = rte_argparse_parse(obj, 2, argv);
+  

[RFC v3 02/12] argparse: add argparse library

2023-12-11 Thread Chengwen Feng
Introduce argparse library (which was inspired by the thread [1]). This
commit provides public API and doc.

[1] 
https://patchwork.dpdk.org/project/dpdk/patch/20231105054539.22303-2-fengcheng...@huawei.com/

Signed-off-by: Chengwen Feng 
---
 doc/api/doxy-api-index.md  |   1 +
 doc/api/doxy-api.conf.in   |   1 +
 doc/guides/prog_guide/argparse_lib.rst | 141 ++
 doc/guides/prog_guide/index.rst|   1 +
 lib/argparse/meson.build   |   7 +
 lib/argparse/rte_argparse.c|  14 ++
 lib/argparse/rte_argparse.h| 191 +
 lib/argparse/version.map   |   7 +
 lib/meson.build|   1 +
 9 files changed, 364 insertions(+)
 create mode 100644 doc/guides/prog_guide/argparse_lib.rst
 create mode 100644 lib/argparse/meson.build
 create mode 100644 lib/argparse/rte_argparse.c
 create mode 100644 lib/argparse/rte_argparse.h
 create mode 100644 lib/argparse/version.map

diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index a6a768bd7c..fe41fba6ec 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -220,6 +220,7 @@ The public API headers are grouped by topics:
   [random](@ref rte_random.h),
   [config file](@ref rte_cfgfile.h),
   [key/value args](@ref rte_kvargs.h),
+  [argument parse](@ref rte_argparse.h),
   [string](@ref rte_string_fns.h),
   [thread](@ref rte_thread.h)
 
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
index e94c9e4e46..76f89afe71 100644
--- a/doc/api/doxy-api.conf.in
+++ b/doc/api/doxy-api.conf.in
@@ -28,6 +28,7 @@ INPUT   = @TOPDIR@/doc/api/doxy-api-index.md \
   @TOPDIR@/lib/eal/include \
   @TOPDIR@/lib/eal/include/generic \
   @TOPDIR@/lib/acl \
+  @TOPDIR@/lib/argparse \
   @TOPDIR@/lib/bbdev \
   @TOPDIR@/lib/bitratestats \
   @TOPDIR@/lib/bpf \
diff --git a/doc/guides/prog_guide/argparse_lib.rst 
b/doc/guides/prog_guide/argparse_lib.rst
new file mode 100644
index 00..a421fab757
--- /dev/null
+++ b/doc/guides/prog_guide/argparse_lib.rst
@@ -0,0 +1,141 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+   Copyright 2023 HiSilicon Limited
+
+Argparse Library
+
+
+The argparse library provides argument parse functionality, this library makes
+it easy to write user-friendly command-line program.
+
+Features and Capabilities
+-
+
+- Support parse optional argument (which could take with no-value,
+  required-value and optional-value).
+
+- Support parse positional argument (which must take with required-value).
+
+- Support automatic generate usage information.
+
+- Support issue errors when provide with invalid arguments.
+
+- Support parse argument by two way: 1) autosave: for which known value types,
+  this way can be used; 2) callback: will invoke user callback to parse.
+
+Usage Guide
+---
+
+The following code demonstrates how to initialize:
+
+.. code-block:: C
+
+   static int
+   argparse_user_callback(uint32_t index, const char *value, void *opaque)
+   {
+  if (index == 1) {
+ /* process "--ddd" argument, because it has no-value, the parameter 
value is NULL. */
+ ...
+  } else if (index == 2) {
+ /* process "--eee" argument, because it has required-value, the 
parameter value must not NULL. */
+ ...
+  } else if (index == 3) {
+ /* process "--fff" argument, because it has optional-value, the 
parameter value maybe NULL or not NULL, depend on input. */
+ ...
+  } else if (index == 300) {
+ /* process "ppp" argument, because it's a positional argument, the 
parameter value must not NULL. */
+ ...
+  } else {
+ return -EINVAL;
+  }
+   }
+
+   int aaa_val, bbb_val, ccc_val, ooo_val;
+
+   static struct rte_argparse obj = {
+  .prog_name = "test-demo",
+  .usage = "[EAL options] -- [optional parameters] [positional 
parameters]",
+  .descriptor = NULL,
+  .epilog = NULL,
+  .exit_on_error = true,
+  .callback = argparse_user_callback,
+  .args = {
+ { "--aaa", "-a", "aaa argument", &aaa_val, (void *)100, 
RTE_ARGPARSE_ARG_NO_VALUE   | RTE_ARGPARSE_ARG_VALUE_INT },
+ { "--bbb", "-b", "bbb argument", &bbb_val, NULL,
RTE_ARGPARSE_ARG_REQUIRED_VALUE | RTE_ARGPARSE_ARG_VALUE_INT },
+ { "--ccc", "-c", "ccc argument", &ccc_val, (void *)200, 
RTE_ARGPARSE_ARG_OPTIONAL_VALUE | RTE_ARGPARSE_ARG_VALUE_INT },
+ { "--ddd", "-d", "ddd argument", NULL, (void *)1,   
RTE_ARGPARSE_ARG_NO_VALUE   },
+ { "--eee", "-e", "eee argument", NULL, (void *)2,   
RTE_ARGPARSE_ARG_REQUIRED_VALUE },
+ { "--fff", "-f", "fff argument", NULL, (void *)3,   
RTE_ARGPARSE_ARG_OPTIONAL_VALUE },
+ { "ooo",  

[RFC v3 12/12] examples/dma: replace getopt with argparse

2023-12-11 Thread Chengwen Feng
Replace getopt with argparse.

Signed-off-by: Chengwen Feng 
---
 examples/dma/dmafwd.c| 279 ++-
 examples/dma/meson.build |   2 +-
 2 files changed, 127 insertions(+), 154 deletions(-)

diff --git a/examples/dma/dmafwd.c b/examples/dma/dmafwd.c
index f27317a622..4cc0913240 100644
--- a/examples/dma/dmafwd.c
+++ b/examples/dma/dmafwd.c
@@ -4,11 +4,11 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -18,16 +18,18 @@
 #define MAX_PKT_BURST 32
 #define MEMPOOL_CACHE_SIZE 512
 #define MIN_POOL_SIZE 65536U
-#define CMD_LINE_OPT_MAC_UPDATING "mac-updating"
-#define CMD_LINE_OPT_NO_MAC_UPDATING "no-mac-updating"
-#define CMD_LINE_OPT_PORTMASK "portmask"
-#define CMD_LINE_OPT_NB_QUEUE "nb-queue"
-#define CMD_LINE_OPT_COPY_TYPE "copy-type"
-#define CMD_LINE_OPT_RING_SIZE "ring-size"
-#define CMD_LINE_OPT_BATCH_SIZE "dma-batch-size"
-#define CMD_LINE_OPT_FRAME_SIZE "max-frame-size"
-#define CMD_LINE_OPT_FORCE_COPY_SIZE "force-min-copy-size"
-#define CMD_LINE_OPT_STATS_INTERVAL "stats-interval"
+#define CMD_LINE_OPT_MAC_UPDATING "--mac-updating"
+#define CMD_LINE_OPT_NO_MAC_UPDATING "--no-mac-updating"
+#define CMD_LINE_OPT_PORTMASK "--portmask"
+#define CMD_LINE_OPT_PORTMASK_INDEX 1
+#define CMD_LINE_OPT_NB_QUEUE "--nb-queue"
+#define CMD_LINE_OPT_COPY_TYPE "--copy-type"
+#define CMD_LINE_OPT_COPY_TYPE_INDEX 2
+#define CMD_LINE_OPT_RING_SIZE "--ring-size"
+#define CMD_LINE_OPT_BATCH_SIZE "--dma-batch-size"
+#define CMD_LINE_OPT_FRAME_SIZE "--max-frame-size"
+#define CMD_LINE_OPT_FORCE_COPY_SIZE "--force-min-copy-size"
+#define CMD_LINE_OPT_STATS_INTERVAL "--stats-interval"
 
 /* configurable number of RX/TX ring descriptors */
 #define RX_DEFAULT_RINGSIZE 1024
@@ -95,10 +97,10 @@ static copy_mode_t copy_mode = COPY_MODE_DMA_NUM;
 /* size of descriptor ring for hardware copy mode or
  * rte_ring for software copy mode
  */
-static unsigned short ring_size = 2048;
+static uint16_t ring_size = 2048;
 
 /* interval, in seconds, between stats prints */
-static unsigned short stats_interval = 1;
+static uint16_t stats_interval = 1;
 /* global mbuf arrays for tracking DMA bufs */
 #define MBUF_RING_SIZE 2048
 #define MBUF_RING_MASK (MBUF_RING_SIZE - 1)
@@ -583,26 +585,6 @@ static void start_forwarding_cores(void)
 }
 /* >8 End of starting to process for each lcore. */
 
-/* Display usage */
-static void
-dma_usage(const char *prgname)
-{
-   printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
-   "  -b --dma-batch-size: number of requests per DMA batch\n"
-   "  -f --max-frame-size: max frame size\n"
-   "  -m --force-min-copy-size: force a minimum copy length, even 
for smaller packets\n"
-   "  -p --portmask: hexadecimal bitmask of ports to configure\n"
-   "  -q NQ: number of RX queues per port (default is 1)\n"
-   "  --[no-]mac-updating: Enable or disable MAC addresses 
updating (enabled by default)\n"
-   "  When enabled:\n"
-   "   - The source MAC address is replaced by the TX port MAC 
address\n"
-   "   - The destination MAC address is replaced by 
02:00:00:00:00:TX_PORT_ID\n"
-   "  -c --copy-type CT: type of copy: sw|hw\n"
-   "  -s --ring-size RS: size of dmadev descriptor ring for 
hardware copy mode or rte_ring for software copy mode\n"
-   "  -i --stats-interval SI: interval, in seconds, between stats 
prints (default is 1)\n",
-   prgname);
-}
-
 static int
 dma_parse_portmask(const char *portmask)
 {
@@ -628,142 +610,133 @@ dma_parse_copy_mode(const char *copy_mode)
return COPY_MODE_INVALID_NUM;
 }
 
+static int
+dma_parse_args_cb(uint32_t index, const char *value, void *opaque)
+{
+   int port_mask;
+
+   RTE_SET_USED(opaque);
+
+   if (index == CMD_LINE_OPT_PORTMASK_INDEX) {
+   port_mask = dma_parse_portmask(value);
+   if (port_mask & ~dma_enabled_port_mask || port_mask <= 0) {
+   printf("Invalid portmask, %s, suggest 0x%x\n",
+   value, dma_enabled_port_mask);
+   return -1;
+   }
+   dma_enabled_port_mask = port_mask;
+   } else if (index == CMD_LINE_OPT_COPY_TYPE_INDEX) {
+   copy_mode = dma_parse_copy_mode(value);
+   if (copy_mode == COPY_MODE_INVALID_NUM) {
+   printf("Invalid copy type. Use: sw, hw\n");
+   return -1;
+   }
+   } else {
+   printf("Invalid index %u\n", index);
+   return -1;
+   }
+
+   return 0;
+}
+
 /* Parse the argument given in the command line of the application */
 static int
 dma_parse_args(int argc, char **argv, unsigned int nb_ports)
 {
-   static const char short_options[] =
-   "b:"  /* dma batch size */
-

[RFC v3 10/12] test/argparse: add parse unsigned base type test

2023-12-11 Thread Chengwen Feng
This commit adds parsing unsigned base type (u8/u16/u32/u64) test.

Signed-off-by: Chengwen Feng 
---
 app/test/test_argparse.c | 59 
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/app/test/test_argparse.c b/app/test/test_argparse.c
index 06336714d9..9687c6e733 100644
--- a/app/test/test_argparse.c
+++ b/app/test/test_argparse.c
@@ -733,19 +733,68 @@ static int
 test_argparse_parse_type(void)
 {
char *str_erange = test_strdup("99");
+   char *str_erange_u32 = test_strdup("4294967296");
+   char *str_erange_u16 = test_strdup("65536");
+   char *str_erange_u8 = test_strdup("256");
char *str_invalid = test_strdup("1a");
char *str_ok = test_strdup("123");
-   int value;
+   uint16_t val_u16;
+   uint32_t val_u32;
+   uint64_t val_u64;
+   uint8_t val_u8;
+   int val_int;
int ret;
 
/* test for int parsing */
-   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_INT, 
&val_int);
TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
-   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_INT, 
&val_int);
TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
-   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_INT, 
&value);
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_INT, 
&val_int);
TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
-   TEST_ASSERT(value == 123, "Argparse parse type expect failed!");
+   TEST_ASSERT(val_int == 123, "Argparse parse type expect failed!");
+
+   /* test for u8 parsing */
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_U8, 
&val_u8);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_erange_u8, RTE_ARGPARSE_ARG_VALUE_U8, 
&val_u8);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_U8, 
&val_u8);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_U8, 
&val_u8);
+   TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
+   TEST_ASSERT(val_u8 == 123, "Argparse parse type expect failed!");
+
+   /* test for u16 parsing */
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_U16, 
&val_u16);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_erange_u16, 
RTE_ARGPARSE_ARG_VALUE_U16, &val_u16);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_U16, 
&val_u16);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_U16, 
&val_u16);
+   TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
+   TEST_ASSERT(val_u16 == 123, "Argparse parse type expect failed!");
+
+   /* test for u32 parsing */
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_U32, 
&val_u32);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_erange_u32, 
RTE_ARGPARSE_ARG_VALUE_U32, &val_u32);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_U32, 
&val_u32);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_U32, 
&val_u32);
+   TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
+   TEST_ASSERT(val_u32 == 123, "Argparse parse type expect failed!");
+
+   /* test for u64 parsing */
+   ret = rte_argparse_parse_type(str_erange, RTE_ARGPARSE_ARG_VALUE_U64, 
&val_u64);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_invalid, RTE_ARGPARSE_ARG_VALUE_U64, 
&val_u64);
+   TEST_ASSERT(ret != 0, "Argparse parse type expect failed!");
+   ret = rte_argparse_parse_type(str_ok, RTE_ARGPARSE_ARG_VALUE_U64, 
&val_u64);
+   TEST_ASSERT(ret == 0, "Argparse parse type expect failed!");
+   TEST_ASSERT(val_u64 == 123, "Argparse parse type expect failed!");
 
return 0;
 }
-- 
2.17.1



[RFC v3 11/12] argparse: pretty help info

2023-12-11 Thread Chengwen Feng
This commit aligns help info.

Take dmafwd as an example, previous:

options:
 -h, --help: show this help message and exit.
 --mac-updating: Enable MAC addresses updating
 --no-mac-updating: Disable MAC addresses updating
 -p, --portmask: hexadecimal bitmask of ports to configure
 -q, --nb-queue: number of RX queues per port (default is 1)
 -c, --copy-type: type of copy: sw|hw
 -s, --ring-size: size of dmadev descriptor ring for hardware copy mode or 
rte_ring for software copy mode
 -b, --dma-batch-size: number of requests per DMA batch
 -f, --max-frame-size: max frame size
 -m, --force-min-copy-size: force a minimum copy length, even for smaller 
packets
 -i, --stats-interval: interval, in seconds, between stats prints (default is 1)

Now:
options:
 -h, --help show this help message and exit.
 --mac-updating Enable MAC addresses updating
 --no-mac-updating  Disable MAC addresses updating
 -p, --portmask hexadecimal bitmask of ports to configure
 -q, --nb-queue number of RX queues per port (default is 1)
 -c, --copy-typetype of copy: sw|hw
 -s, --ring-sizesize of dmadev descriptor ring for hardware copy 
mode or rte_ring for software copy mode
 -b, --dma-batch-size   number of requests per DMA batch
 -f, --max-frame-size   max frame size
 -m, --force-min-copy-size  force a minimum copy length, even for smaller 
packets
 -i, --stats-interval   interval, in seconds, between stats prints (default 
is 1)

Signed-off-by: Chengwen Feng 
---
 lib/argparse/rte_argparse.c | 67 +++--
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/lib/argparse/rte_argparse.c b/lib/argparse/rte_argparse.c
index bc5b9e87c1..3cfa176e9b 100644
--- a/lib/argparse/rte_argparse.c
+++ b/lib/argparse/rte_argparse.c
@@ -634,8 +634,47 @@ parse_args(struct rte_argparse *obj, int argc, char 
**argv, bool *show_help)
return 0;
 }
 
+static uint32_t
+calc_help_align(const struct rte_argparse *obj)
+{
+   const struct rte_argparse_arg *arg;
+   uint32_t width = 12; /* Default "-h, --help  " len. */
+   uint32_t len;
+   uint32_t i;
+
+   for (i = 0; /* NULL */; i++) {
+   arg = &obj->args[i];
+   if (arg->name_long == NULL)
+   break;
+   len = strlen(arg->name_long);
+   if (is_arg_optional(arg) && arg->name_short != NULL) {
+   len += strlen(", ");
+   len += strlen(arg->name_short);
+   }
+   width = RTE_MAX(width, 1 + len + 2); /* start with 1 & end with 
2 space. */
+   }
+
+   return width;
+}
+
+static void
+show_oneline_help(const struct rte_argparse_arg *arg, uint32_t width)
+{
+   uint32_t len = 0;
+   uint32_t i;
+
+   if (arg->name_short != NULL)
+   len = printf(" %s,", arg->name_short);
+   len += printf(" %s", arg->name_long);
+
+   for (i = len; i < width; i++)
+   printf(" ");
+
+   printf("%s\n", arg->help);
+}
+
 static void
-show_args_pos_help(const struct rte_argparse *obj)
+show_args_pos_help(const struct rte_argparse *obj, uint32_t align)
 {
uint32_t position_count = calc_position_count(obj);
const struct rte_argparse_arg *arg;
@@ -651,43 +690,49 @@ show_args_pos_help(const struct rte_argparse *obj)
break;
if (!is_arg_positional(arg))
continue;
-   printf(" %s: %s\n", arg->name_long, arg->help);
+   show_oneline_help(arg, align);
}
 }
 
 static void
-show_args_opt_help(const struct rte_argparse *obj)
+show_args_opt_help(const struct rte_argparse *obj, uint32_t align)
 {
+   static const struct rte_argparse_arg help = {
+   .name_long = "--help",
+   .name_short = "-h",
+   .help = "show this help message and exit.",
+   };
const struct rte_argparse_arg *arg;
uint32_t i;
 
-   printf("\noptions:\n"
-  " -h, --help: show this help message and exit.\n");
+   printf("\noptions:\n");
+   show_oneline_help(&help, align);
for (i = 0; /* NULL */; i++) {
arg = &obj->args[i];
if (arg->name_long == NULL)
break;
if (!is_arg_optional(arg))
continue;
-   if (arg->name_short != NULL)
-   printf(" %s, %s: %s\n", arg->name_short, 
arg->name_long, arg->help);
-   else
-   printf(" %s: %s\n", arg->name_long, arg->help);
+   show_oneline_help(arg, align);
}
 }
 
 static void
 show_args_help(const struct rte_argparse *obj)
 {
+   uint32_t align = calc_help_align(obj);
+
printf("usage: %s %s\n", obj->prog_name, obj->usage);
if (obj->descriptor != NULL)
printf("\ndescriptor: %s\n",

Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Yuanhan Liu
Hi all,

I'd like to share a new DPDK open source project, libtpa(Transport
Protocol Acceleration)[0], which is just another userspace TCP stack
implementation so far, written from scratch.

I started this project 3 years ago, while I was searching for a feasible
open source project with no luck. There were indeed quite a few options,
but none of them actually met my needs. I then started writing one. Likely,
there are still other guys out there looking for a high performance and
stable userspace TCP stack. This is what this email and libtpa for.

Libtpa is fast. To demonstrate that, we did a hacky redis integration. The
benchmark shows that libtpa can boost the performance more than 5 times,
from 0.21m rps to 1.14m rps[1]. Right, it can achieve 1 million rps just
with one CPU thread. Meanwhile, the p99 latency decreases from 0.815ms
to 0.159ms.

Regarding the stableness, I'd say it's not bad, all kudos to the
comprehensive testing. I've written more than 200 tests. Together with
the testing arguments matrix[2], it can result in a big variety of test
cases. Therefore, most of the bugs are captured before deployment.

Having said that, I'd still suggest you to do as much testing as you can
if you want to use it, for libtpa is still under active development and
it's just v1.0-rc0 being released. Tons of changes have been made since
the last stable release.

There is one more thing I'm a bit proud of about libtpa: as a DPDK based
project, libtpa has rich set of debug tools[3]. The sock tracing is
particularly handy on debugging that libtpa doesn't ship a tcpdump like
tool, simply for we don't really need one.

The TCP part then may not sound that exciting. It's basically just an
initial TCP implementation, with standard congestion avoid algorithm
(New Reno). Libtpa implements slightly more than that though, such as
SACK, congestion window validation, spurious retransmission detection,
keepalive, etc.

That's all. Comments, questions, patches and testing are all welcome!

Thanks,
Yuanhan Liu

---
[0]: libtpa: https://github.com/bytedance/libtpa
[1]: redis: https://github.com/bytedance/libtpa/tree/main/doc/redis.rst
[2]: matrix shell: 
https://github.com/bytedance/libtpa/tree/main/doc/internals.rst
[3]: user guide: 
https://github.com/bytedance/libtpa/tree/main/doc/user_guide.rst


Re: [PATCH] lib/dmadev: get DMA device using device ID

2023-12-11 Thread Bruce Richardson
On Fri, Dec 08, 2023 at 01:25:25PM +0530, Amit Prakash Shukla wrote:
> DMA library has a function to get DMA device based on device name but
> there is no function to get DMA device using device id.
> 
> Added a function that lookup for the dma device using device id and
> returns the pointer to the same.
> 
> Signed-off-by: Amit Prakash Shukla 
> ---
>  lib/dmadev/rte_dmadev.c |  9 +
>  lib/dmadev/rte_dmadev_pmd.h | 14 ++
>  lib/dmadev/version.map  |  1 +
>  3 files changed, 24 insertions(+)
> 
What is the use-case for these functions? With the dmadev library
abstraction, other libs and apps should never need a pointer to an dmadev
struct.


Huawei Roadmap for DPDK 24.03 release

2023-12-11 Thread Jie Hai



The following are the proposed Huawei roadmap items for DPDK 24.03:

lib
===
1) Introduce argparse library
https://patches.dpdk.org/project/dpdk/patch/20231204075048.894-2-fengcheng...@huawei.com/
2) Introduce memarea library
https://inbox.dpdk.org/dev/20220721044648.6817-1-fengcheng...@huawei.com/

ethdev
==
1) Fix race-condition of proactive error handling mode
https://patches.dpdk.org/project/dpdk/patch/20231106131128.33499-2-fengcheng...@huawei.com/

dmadev
==
1) Support DMA dataplane tracepoint (known issues in 23.11)

driver
==
1) Introduce UACCE bus
https://patches.dpdk.org/project/dpdk/patch/20231208061836.31693-1-fengcheng...@huawei.com/

hns3 driver
===
1) Support monitor addr

Test applications
=
1) testpmd attach/detach on primary and secondary


RE: [EXT] [PATCH] examples/packet_ordering: fix segfault in disable_reorder mode

2023-12-11 Thread Volodymyr Fialko
> -Original Message-
> From: Qian Hao 
> Sent: Friday, December 8, 2023 1:43 PM
> To: dev@dpdk.org
> Cc: Volodymyr Fialko 
> Subject: [EXT] [PATCH] examples/packet_ordering: fix segfault in 
> disable_reorder mode


Good catch overall, but few comments:

1. Please fix checkpatch coding style issues:
http://patchwork.dpdk.org/project/dpdk/patch/20231208124231.198138-1-qi_an_...@126.com/
Check dpdk contributing guide to see how to run it locally:
https://doc.dpdk.org/guides/contributing/patches.html#checking-the-patches

2. This patch will add if check per burst of packets (even so it will be easy to
branch predict for CPU since this flag does not changes), I still think it would
be better to check this condition only once before starting the rx_thread and
let compiler inline the rest. So something like this:

// mark rx_thread inline with explicit parameter
static __rte_always_inline int
rx_thread(struct rte_ring *ring_out, bool disable_reorder)

// create two separate functions with baked flag
static __rte_noinline int
rx_thread_reorder(struct rte_ring *ring_out)
{
 return rx_thread(ring_out, false);
}
static __rte_noinline int
rx_thread_reorder_disabled(struct rte_ring *ring_out)
{
return rx_thread(ring_out, true);
}

// dispatch only once in main
/* Start rx_thread() on the main core */
if (disable_reorder)
rx_thread_reorder_disabled(rx_to_workers);
else
rx_thread_reorder(rx_to_workers);

/Volodymyr



Re: Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Liang Ma
Hi Yuanhan,
   Congratulations!  
Regards
Liang

On Mon, Dec 11, 2023 at 05:56:54PM +0800, Yuanhan Liu wrote:
> Hi all,
> 
> I'd like to share a new DPDK open source project, libtpa(Transport
> Protocol Acceleration)[0], which is just another userspace TCP stack
> implementation so far, written from scratch.
> 
> I started this project 3 years ago, while I was searching for a feasible
> open source project with no luck. There were indeed quite a few options,
> but none of them actually met my needs. I then started writing one. Likely,
> there are still other guys out there looking for a high performance and
> stable userspace TCP stack. This is what this email and libtpa for.
> 
> Libtpa is fast. To demonstrate that, we did a hacky redis integration. The
> benchmark shows that libtpa can boost the performance more than 5 times,
> from 0.21m rps to 1.14m rps[1]. Right, it can achieve 1 million rps just
> with one CPU thread. Meanwhile, the p99 latency decreases from 0.815ms
> to 0.159ms.
> 
> Regarding the stableness, I'd say it's not bad, all kudos to the
> comprehensive testing. I've written more than 200 tests. Together with
> the testing arguments matrix[2], it can result in a big variety of test
> cases. Therefore, most of the bugs are captured before deployment.
> 
> Having said that, I'd still suggest you to do as much testing as you can
> if you want to use it, for libtpa is still under active development and
> it's just v1.0-rc0 being released. Tons of changes have been made since
> the last stable release.
> 
> There is one more thing I'm a bit proud of about libtpa: as a DPDK based
> project, libtpa has rich set of debug tools[3]. The sock tracing is
> particularly handy on debugging that libtpa doesn't ship a tcpdump like
> tool, simply for we don't really need one.
> 
> The TCP part then may not sound that exciting. It's basically just an
> initial TCP implementation, with standard congestion avoid algorithm
> (New Reno). Libtpa implements slightly more than that though, such as
> SACK, congestion window validation, spurious retransmission detection,
> keepalive, etc.
> 
> That's all. Comments, questions, patches and testing are all welcome!
> 
> Thanks,
> Yuanhan Liu
> 
> ---
> [0]: libtpa: https://github.com/bytedance/libtpa
> [1]: redis: https://github.com/bytedance/libtpa/tree/main/doc/redis.rst
> [2]: matrix shell: 
> https://github.com/bytedance/libtpa/tree/main/doc/internals.rst
> [3]: user guide: 
> https://github.com/bytedance/libtpa/tree/main/doc/user_guide.rst


RE: [EXT] Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Jerin Jacob Kollanukkaran



> -Original Message-
> From: Yuanhan Liu 
> Sent: Monday, December 11, 2023 3:27 PM
> To: lib...@googlegroups.com
> Cc: dev@dpdk.org; Yuanhan Liu 
> Subject: [EXT] Libtpa: a DPDK based userspace TCP stack implementation
> 
> External Email
> 
> --
> Hi all,
> 
> I'd like to share a new DPDK open source project, libtpa(Transport Protocol
> Acceleration)[0], which is just another userspace TCP stack implementation so
> far, written from scratch.
> 
> I started this project 3 years ago, while I was searching for a feasible open
> source project with no luck. There were indeed quite a few options, but none 
> of
> them actually met my needs. I then started writing one. Likely, there are 
> still
> other guys out there looking for a high performance and stable userspace TCP
> stack. This is what this email and libtpa for.

Great Yuanhan.

If you have time and willing to put effort, I suggest make this part of dpdk 
code base
as new library (tcp or so) and leverage + improve another existing library such 
ip_frag.

I believe, that is only way.
- This code soon won't soon outdated based on new DPDK version
- More community review and contributors
- More review and features from NIC vendors PoV.
- More arch and driver support.
- More quality

Just my 2c.



Re: [PATCH] event/cnxk: update base code

2023-12-11 Thread Jerin Jacob
On Fri, Dec 8, 2023 at 9:03 AM  wrote:
>
> From: Pavan Nikhilesh 
>
> Update base code.

Could you split the patches.

>
> Signed-off-by: Pavan Nikhilesh 
> ---
>  drivers/common/cnxk/hw/ssow.h   |   4 +
>  drivers/common/cnxk/hw/tim.h|   5 +-
>  drivers/common/cnxk/roc_mbox.h  |  11 +++
>  drivers/common/cnxk/roc_sso.c   | 123 ++--
>  drivers/common/cnxk/roc_sso.h   |   6 +-
>  drivers/common/cnxk/roc_tim.c   |  27 +-
>  drivers/common/cnxk/roc_tim.h   |   3 +
>  drivers/event/cnxk/cn10k_eventdev.c |   6 +-
>  drivers/event/cnxk/cn9k_eventdev.c  |  18 ++--
>  9 files changed, 162 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
> index 618ab7973b..c146a8c3ef 100644
> --- a/drivers/common/cnxk/hw/ssow.h
> +++ b/drivers/common/cnxk/hw/ssow.h
> @@ -54,6 +54,8 @@
>  #define SSOW_LF_GWS_OP_SWTAG_FULL1   (0xc28ull)
>  #define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull)
>
> +#define SSOW_LF_GWS_MAX_NW_TIM_US (0x400) /* [CN9K, CN10K) */
> +
>  /* Enum offsets */
>
>  #define SSOW_LF_INT_VEC_IOP (0x0ull)
> @@ -65,6 +67,8 @@
>  #define SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT 63
>  #define SSOW_LF_GWS_TAG_PEND_SWITCH_BIT  62
>  #define SSOW_LF_GWS_TAG_PEND_DESCHED_BIT  58
> +#define SSOW_LF_GWS_TAG_PEND_FLUSH   56
> +#define SSOW_LF_GWS_TAG_PEND_SWUNT   54
>  #define SSOW_LF_GWS_TAG_HEAD_BIT 35
>
>  #endif /* __SSOW_HW_H__ */
> diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
> index 61c38ae175..82b094e3dc 100644
> --- a/drivers/common/cnxk/hw/tim.h
> +++ b/drivers/common/cnxk/hw/tim.h
> @@ -49,7 +49,8 @@
>  #define TIM_LF_RING_REL   (0x400)
>
>  #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
> -#define TIM_MAX_BUCKET_SIZE((1ULL << 20) - 1)
> -#define TIM_MIN_BUCKET_SIZE3
> +#define TIM_MAX_BUCKET_SIZE((1ULL << 20) - 2)
> +#define TIM_MIN_BUCKET_SIZE1
> +#define TIM_BUCKET_WRAP_SIZE   3
>
>  #endif /* __TIM_HW_H__ */
> diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
> index 05434aec5a..4590e5f2dd 100644
> --- a/drivers/common/cnxk/roc_mbox.h
> +++ b/drivers/common/cnxk/roc_mbox.h
> @@ -154,6 +154,8 @@ struct mbox_msghdr {
> M(TIM_DISABLE_RING, 0x804, tim_disable_ring, tim_ring_req, msg_rsp)   
>  \
> M(TIM_GET_MIN_INTVL, 0x805, tim_get_min_intvl, tim_intvl_req, 
>  \
>   tim_intvl_rsp)  
>  \
> +   M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req, 
>  \
> + tim_capture_rsp)
>  \
> /* CPT mbox IDs (range 0xA00 - 0xBFF) */  
>  \
> M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)   
>  \
> M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)  
>  \
> @@ -2541,6 +2543,10 @@ enum tim_clk_srcs {
> TIM_CLK_SRCS_GPIO = 1,
> TIM_CLK_SRCS_GTI = 2,
> TIM_CLK_SRCS_PTP = 3,
> +   TIM_CLK_SRCS_SYNCE = 4,
> +   TIM_CLK_SRCS_BTS = 5,
> +   TIM_CLK_SRCS_EXT_MIO = 6,
> +   TIM_CLK_SRCS_EXT_GTI = 7,
> TIM_CLK_SRSC_INVALID,
>  };
>
> @@ -2652,6 +2658,11 @@ struct tim_intvl_rsp {
> uint64_t __io intvl_ns;
>  };
>
> +struct tim_capture_rsp {
> +   struct mbox_msghdr hdr;
> +   uint64_t __io counters[TIM_CLK_SRSC_INVALID];
> +};
> +
>  struct sdp_node_info {
> /* Node to which this PF belons to */
> uint8_t __io node_id;
> diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
> index 748d287bad..293b0c81a1 100644
> --- a/drivers/common/cnxk/roc_sso.c
> +++ b/drivers/common/cnxk/roc_sso.c
> @@ -17,6 +17,11 @@ sso_lf_alloc(struct dev *dev, enum sso_lf_type lf_type, 
> uint16_t nb_lf,
> struct mbox *mbox = mbox_get(dev->mbox);
> int rc = -ENOSPC;
>
> +   if (!nb_lf) {
> +   mbox_put(mbox);
> +   return 0;
> +   }
> +
> switch (lf_type) {
> case SSO_LF_TYPE_HWS: {
> struct ssow_lf_alloc_req *req;
> @@ -56,6 +61,11 @@ sso_lf_free(struct dev *dev, enum sso_lf_type lf_type, 
> uint16_t nb_lf)
> struct mbox *mbox = mbox_get(dev->mbox);
> int rc = -ENOSPC;
>
> +   if (!nb_lf) {
> +   mbox_put(mbox);
> +   return 0;
> +   }
> +
> switch (lf_type) {
> case SSO_LF_TYPE_HWS: {
> struct ssow_lf_free_req *req;
> @@ -98,6 +108,11 @@ sso_rsrc_attach(struct roc_sso *roc_sso, enum sso_lf_type 
> lf_type,
> struct rsrc_attach_req *req;
> int rc = -ENOSPC;
>
> +   if (!nb_lf) {
> +   mbox_put(mbox);
> +   return 0;
> +   }
> +
> req = mbox_alloc_msg_attach_resources(mbox);
> if (req == NULL)
> goto exit;
> @@ -220,6 +235,47 @@ sso_hws_link_modify(uint8_t 

Re: [PATCH v3 8/8] examples/l3fwd-power: update to call arg parser API

2023-12-11 Thread Hunt, David

Hi Euan,

On 07/12/2023 16:18, Euan Bourke wrote:

Update to the l3fwd-power example application to call the arg parser
library for its 'combined core string parser' instead of implementing its
own corelist parser. The default_type passed into the function call is
a corelist.

Signed-off-by: Euan Bourke 
---
  examples/l3fwd-power/perf_core.c | 51 +---
  1 file changed, 8 insertions(+), 43 deletions(-)

diff --git a/examples/l3fwd-power/perf_core.c b/examples/l3fwd-power/perf_core.c
index 41ef6d0c9a..f8511e30b3 100644
--- a/examples/l3fwd-power/perf_core.c
+++ b/examples/l3fwd-power/perf_core.c
@@ -12,6 +12,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "perf_core.h"

  #include "main.h"
@@ -177,56 +178,20 @@ parse_perf_config(const char *q_arg)
  int
  parse_perf_core_list(const char *corelist)
  {
-   int i, idx = 0;
-   unsigned int count = 0;
-   char *end = NULL;
-   int min, max;
+   int count;
+   uint16_t cores[RTE_MAX_LCORE];
  
  	if (corelist == NULL) {

printf("invalid core list\n");
return -1;
}
  
+	count = rte_arg_parse_core_string(corelist, cores, RTE_DIM(cores), 1);
  
-	/* Remove all blank characters ahead and after */

-   while (isblank(*corelist))
-   corelist++;
-   i = strlen(corelist);
-   while ((i > 0) && isblank(corelist[i - 1]))
-   i--;
+   for (int i = 0; i < count; i++)



nit: you've used int here, but below you use uint16_t for a for loop. If 
you're re-spinning, it might be worth making consistent. But no biggie.


--snip--


@@ -234,7 +199,7 @@ parse_perf_core_list(const char *corelist)
nb_hp_lcores = count;
  
  	printf("Configured %d high performance cores\n", nb_hp_lcores);

-   for (i = 0; i < nb_hp_lcores; i++)
+   for (uint16_t i = 0; i < nb_hp_lcores; i++)
printf("\tHigh performance core %d %d\n",
i, hp_lcores[i]);
  



I've also tested this with a 16-core incantation of l3fwd-power with 
various combinations of cores, seems to work well.


Acked-by: David Hunt 







Re: [PATCH v4 3/3] net/octeon_ep: use AVX2 instructions for Rx

2023-12-11 Thread Jerin Jacob
On Thu, Dec 7, 2023 at 2:03 PM  wrote:
>
> From: Pavan Nikhilesh 
>
> Optimize Rx routine to use AVX2 instructions when underlying
> architecture supports it.
>
> Signed-off-by: Pavan Nikhilesh 
> ---
>  doc/guides/rel_notes/release_24_03.rst |   5 +
>  drivers/net/octeon_ep/cnxk_ep_rx_avx.c | 123 +
>  drivers/net/octeon_ep/meson.build  |  12 +++
>  drivers/net/octeon_ep/otx_ep_ethdev.c  |  10 ++
>  drivers/net/octeon_ep/otx_ep_rxtx.h|   6 ++
>  5 files changed, 156 insertions(+)
>  create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx_avx.c
>
> diff --git a/doc/guides/rel_notes/release_24_03.rst 
> b/doc/guides/rel_notes/release_24_03.rst
> index 6f8ad27808..2191dd78e7 100644
> --- a/doc/guides/rel_notes/release_24_03.rst
> +++ b/doc/guides/rel_notes/release_24_03.rst
> @@ -55,6 +55,11 @@ New Features
>   Also, make sure to start the actual text at the margin.
>   ===
>
> +* **Updated Marvell Octeon ep driver.**
> +
> +  * Added SSE/AVX2 Rx routines.
> +  * Updated Tx queue thresholds.
Please add little  more info in Tx queue one.

Also, Split doc changes to respective patches.


Re: [EXT] Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Thomas Monjalon
11/12/2023 12:32, Jerin Jacob Kollanukkaran:
> From: Yuanhan Liu 
> > Hi all,
> > 
> > I'd like to share a new DPDK open source project, libtpa(Transport Protocol
> > Acceleration)[0], which is just another userspace TCP stack implementation 
> > so
> > far, written from scratch.
> > 
> > I started this project 3 years ago, while I was searching for a feasible 
> > open
> > source project with no luck. There were indeed quite a few options, but 
> > none of
> > them actually met my needs. I then started writing one. Likely, there are 
> > still
> > other guys out there looking for a high performance and stable userspace TCP
> > stack. This is what this email and libtpa for.
> 
> Great Yuanhan.
> 
> If you have time and willing to put effort, I suggest make this part of dpdk 
> code base
> as new library (tcp or so) and leverage + improve another existing library 
> such ip_frag.
> 
> I believe, that is only way.
> - This code soon won't soon outdated based on new DPDK version
> - More community review and contributors
> - More review and features from NIC vendors PoV.
> - More arch and driver support.
> - More quality

As Yuanhan said, there are many TCP stacks running on top of DPDK.
We should add this one to the list:
https://www.dpdk.org/ecosystem/#projects
Also a discussion has started recently about integrating one in DPDK.
As Jerin suggests, libtpa looks like a very good candidate to focus efforts on 
it.

Regarding performance, how does it compare with F-Stack? TLDK? Seastar?




Re: [EXT] Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Yuanhan Liu
On Mon, Dec 11, 2023 at 11:32:16AM +, Jerin Jacob Kollanukkaran wrote:
> 
> 
> > -Original Message-
> > From: Yuanhan Liu 
> > Sent: Monday, December 11, 2023 3:27 PM
> > To: lib...@googlegroups.com
> > Cc: dev@dpdk.org; Yuanhan Liu 
> > Subject: [EXT] Libtpa: a DPDK based userspace TCP stack implementation
> > 
> > External Email
> > 
> > --
> > Hi all,
> > 
> > I'd like to share a new DPDK open source project, libtpa(Transport Protocol
> > Acceleration)[0], which is just another userspace TCP stack implementation 
> > so
> > far, written from scratch.
> > 
> > I started this project 3 years ago, while I was searching for a feasible 
> > open
> > source project with no luck. There were indeed quite a few options, but 
> > none of
> > them actually met my needs. I then started writing one. Likely, there are 
> > still
> > other guys out there looking for a high performance and stable userspace TCP
> > stack. This is what this email and libtpa for.
> 
> Great Yuanhan.
> 
> If you have time and willing to put effort, I suggest make this part of dpdk 
> code base
> as new library (tcp or so) and leverage + improve another existing library 
> such ip_frag.
> 
> I believe, that is only way.
> - This code soon won't soon outdated based on new DPDK version
> - More community review and contributors
> - More review and features from NIC vendors PoV.
> - More arch and driver support.
> - More quality

Hi Jerin,

Thanks for you suggestion and these really are good points!

Although libtpa is currently designed as a libray, I doubt it would suit
well as a new library to DPDK. Just taking the code base an example,
libtpa so far is about 27K lines of code. The TCP part is only about
3K lines of code. All the rest are codes supporting the TCP part, such
as sock tracing, mem file, mem file auto archive, etc. You can look
more from the internals page (or even read the code ;)

   https://github.com/bytedance/libtpa/blob/main/doc/internals.rst

Thanks,
Yuanhan Liu


> 
> Just my 2c.
> 
> -- 
> You received this message because you are subscribed to the Google Groups 
> "libtpa" group.
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to libtpa+unsubscr...@googlegroups.com.
> To view this discussion on the web visit 
> https://groups.google.com/d/msgid/libtpa/BY3PR18MB478592AF236C7BBDB0285E3CC88FA%40BY3PR18MB4785.namprd18.prod.outlook.com.
> For more options, visit https://groups.google.com/d/optout.


Re: [RFC v2 12/14] lib: convert to per line logging

2023-12-11 Thread David Marchand
On Fri, Dec 8, 2023 at 6:16 PM Stephen Hemminger
 wrote:
>
> On Fri,  8 Dec 2023 15:59:46 +0100
> David Marchand  wrote:
>
> > Convert many libraries that call RTE_LOG(... "\n", ...) to RTE_LOG_LINE.
> >
> > Note:
> > - for acl and sched libraries that still has some debug multilines
> >   messages, a direct call to RTE_LOG is used: this will make it easier to
> >   notice such special cases,
> >
> > Signed-off-by: David Marchand 
> > ---
> >  lib/acl/acl_bld.c   |  28 +--
> >  lib/acl/acl_gen.c   |   8 +-
> >  lib/acl/rte_acl.c   |   8 +-
> >  lib/acl/tb_mem.c|   4 +-
> >  lib/eal/common/eal_common_bus.c |  22 +-
> >  lib/eal/common/eal_common_class.c   |   4 +-
> >  lib/eal/common/eal_common_config.c  |   2 +-
> >  lib/eal/common/eal_common_debug.c   |   6 +-
> >  lib/eal/common/eal_common_dev.c |  80 +++
> >  lib/eal/common/eal_common_devargs.c |  18 +-
> >  lib/eal/common/eal_common_dynmem.c  |  34 +--
> >  lib/eal/common/eal_common_fbarray.c |  12 +-
> >  lib/eal/common/eal_common_interrupts.c  |  38 ++--
> >  lib/eal/common/eal_common_lcore.c   |  26 +--
> >  lib/eal/common/eal_common_memalloc.c|  12 +-
> >  lib/eal/common/eal_common_memory.c  |  66 +++---
> >  lib/eal/common/eal_common_memzone.c |  24 +--
> >  lib/eal/common/eal_common_options.c | 236 ++--
> >  lib/eal/common/eal_common_proc.c| 112 +-
> >  lib/eal/common/eal_common_tailqs.c  |  12 +-
> >  lib/eal/common/eal_common_thread.c  |  12 +-
> >  lib/eal/common/eal_common_timer.c   |   6 +-
> >  lib/eal/common/eal_common_trace_utils.c |   2 +-
> >  lib/eal/common/eal_trace.h  |   4 +-
> >  lib/eal/common/hotplug_mp.c |  54 ++---
> >  lib/eal/common/malloc_elem.c|   6 +-
> >  lib/eal/common/malloc_heap.c|  40 ++--
> >  lib/eal/common/malloc_mp.c  |  72 +++
> >  lib/eal/common/rte_keepalive.c  |   2 +-
> >  lib/eal/common/rte_malloc.c |  10 +-
> >  lib/eal/common/rte_service.c|   8 +-
> >  lib/eal/freebsd/eal.c   |  74 +++
> >  lib/eal/freebsd/eal_alarm.c |   2 +-
> >  lib/eal/freebsd/eal_dev.c   |   8 +-
> >  lib/eal/freebsd/eal_hugepage_info.c |  22 +-
> >  lib/eal/freebsd/eal_interrupts.c|  60 +++---
> >  lib/eal/freebsd/eal_lcore.c |   2 +-
> >  lib/eal/freebsd/eal_memalloc.c  |  10 +-
> >  lib/eal/freebsd/eal_memory.c|  34 +--
> >  lib/eal/freebsd/eal_thread.c|   2 +-
> >  lib/eal/freebsd/eal_timer.c |  10 +-
> >  lib/eal/linux/eal.c | 122 +--
> >  lib/eal/linux/eal_alarm.c   |   2 +-
> >  lib/eal/linux/eal_dev.c |  40 ++--
> >  lib/eal/linux/eal_hugepage_info.c   |  38 ++--
> >  lib/eal/linux/eal_interrupts.c  | 116 +-
> >  lib/eal/linux/eal_lcore.c   |   4 +-
> >  lib/eal/linux/eal_memalloc.c| 120 +--
> >  lib/eal/linux/eal_memory.c  | 208 +-
> >  lib/eal/linux/eal_thread.c  |   4 +-
> >  lib/eal/linux/eal_timer.c   |  10 +-
> >  lib/eal/linux/eal_vfio.c| 270 +++
> >  lib/eal/linux/eal_vfio_mp_sync.c|   4 +-
> >  lib/eal/riscv/rte_cycles.c  |   4 +-
> >  lib/eal/unix/eal_filesystem.c   |  14 +-
> >  lib/eal/unix/eal_firmware.c |   2 +-
> >  lib/eal/unix/eal_unix_memory.c  |   8 +-
> >  lib/eal/unix/rte_thread.c   |  34 +--
> >  lib/eal/windows/eal.c   |  36 ++--
> >  lib/eal/windows/eal_alarm.c |  12 +-
> >  lib/eal/windows/eal_debug.c |   8 +-
> >  lib/eal/windows/eal_dev.c   |   8 +-
> >  lib/eal/windows/eal_hugepages.c |  10 +-
> >  lib/eal/windows/eal_interrupts.c|  10 +-
> >  lib/eal/windows/eal_lcore.c |   6 +-
> >  lib/eal/windows/eal_memalloc.c  |  50 ++---
> >  lib/eal/windows/eal_memory.c|  20 +-
> >  lib/eal/windows/eal_windows.h   |   4 +-
> >  lib/eal/windows/include/rte_windows.h   |   4 +-
> >  lib/eal/windows/rte_thread.c|  28 +--
> >  lib/efd/rte_efd.c   |  58 ++---
> >  lib/fib/rte_fib.c   |  14 +-
> >  lib/fib/rte_fib6.c  |  14 +-
> >  lib/hash/rte_cuckoo_hash.c  |  52 ++---
> >  lib/hash/rte_fbk_hash.c |   4 +-
> >  lib/hash/rte_hash_crc.c |  12 +-
> >  lib/hash/rte_thash.c|  20 +-
> >  lib/hash/rte_thash_gfni.c   |   8 +-
> >  lib/ip_frag/rte_ip_frag_common.c|   8 +-
> >  lib/latencystats/rte_latencystats.c |  41 ++--
> >  lib/log/log.c   |   6 +-
> >  lib/lpm/rte_lpm.c 

Re: [EXT] Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Yuanhan Liu
On Mon, Dec 11, 2023 at 01:16:52PM +0100, Thomas Monjalon wrote:
> 11/12/2023 12:32, Jerin Jacob Kollanukkaran:
> > From: Yuanhan Liu 
> > > Hi all,
> > > 
> > > I'd like to share a new DPDK open source project, libtpa(Transport 
> > > Protocol
> > > Acceleration)[0], which is just another userspace TCP stack 
> > > implementation so
> > > far, written from scratch.
> > > 
> > > I started this project 3 years ago, while I was searching for a feasible 
> > > open
> > > source project with no luck. There were indeed quite a few options, but 
> > > none of
> > > them actually met my needs. I then started writing one. Likely, there are 
> > > still
> > > other guys out there looking for a high performance and stable userspace 
> > > TCP
> > > stack. This is what this email and libtpa for.
> > 
> > Great Yuanhan.
> > 
> > If you have time and willing to put effort, I suggest make this part of 
> > dpdk code base
> > as new library (tcp or so) and leverage + improve another existing library 
> > such ip_frag.
> > 
> > I believe, that is only way.
> > - This code soon won't soon outdated based on new DPDK version
> > - More community review and contributors
> > - More review and features from NIC vendors PoV.
> > - More arch and driver support.
> > - More quality
> 
> As Yuanhan said, there are many TCP stacks running on top of DPDK.
> We should add this one to the list:
>   https://www.dpdk.org/ecosystem/#projects
> Also a discussion has started recently about integrating one in DPDK.
> As Jerin suggests, libtpa looks like a very good candidate to focus efforts 
> on it.
> 
> Regarding performance, how does it compare with F-Stack? TLDK? Seastar?

I think it should be fair to say (I haven't done the testing though; I
never tried to run those stacks), libtpa is the userspace tcp stack with
the best performance I'm aware of. The redis numbers showed in this email
thread is just one example. Libtpa also ships an performance benchmark,
tperf. With tperf write test (and without jumboframe), libtpa can achieve
200G linerate with only one physical core for write. The read test is not
that good though, because of missing hardware acceleration features like
TSO.

Although performance is very important to an userspace stack, I still
want to point out that, during the design, performance is not my major
goal. I spent most of my effort on shaping the testing system and the
debug-ablity initially. Libtpa has been deployed in bytedance for more
than two years, till now, there is no single TCP protocol bug reported.
(I do get very few bugs reported though, but most of them are related
to the OS environment, such as sigbus due to wrong API used when running
out of tmpfs).

Thanks,
Yuanhan Liu


Re: [RFC v2 13/14] lib: replace logging helpers

2023-12-11 Thread David Marchand
On Fri, Dec 8, 2023 at 6:18 PM Stephen Hemminger
 wrote:
>
> On Fri,  8 Dec 2023 15:59:47 +0100
> David Marchand  wrote:
>
> > diff --git a/lib/bpf/bpf_impl.h b/lib/bpf/bpf_impl.h
> > index b483569071..30d83d2b40 100644
> > --- a/lib/bpf/bpf_impl.h
> > +++ b/lib/bpf/bpf_impl.h
> > @@ -27,9 +27,10 @@ int __rte_bpf_jit_x86(struct rte_bpf *bpf);
> >  int __rte_bpf_jit_arm64(struct rte_bpf *bpf);
> >
> >  extern int rte_bpf_logtype;
> > +#define RTE_LOGTYPE_BPF rte_bpf_logtype
> >
> > -#define  RTE_BPF_LOG(lvl, fmt, args...) \
> > - rte_log(RTE_LOG_## lvl, rte_bpf_logtype, fmt, ##args)
> > +#define  BPF_LOG(lvl, fmt, args...) \
> > + RTE_LOG(lvl, BPF, fmt "\n", ##args)
>
> Not sure about this. There were some cases where bpf_XXX function
> names clashed with those in libpcap. That is probably why the
> RTE_BPF_LOG was chosen.
>

That would only impact DPDK compilation as it is an internal header,
but I get your point.
I put a note to update in a next revision.


-- 
David Marchand



Re: [RFC v2 04/14] lib: add newline in logs

2023-12-11 Thread David Marchand
On Fri, Dec 8, 2023 at 6:02 PM Stephen Hemminger
 wrote:
>
> On Fri,  8 Dec 2023 15:59:38 +0100
> David Marchand  wrote:
>
> > Fix places leading to a log message not terminated with a newline.
> >
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: David Marchand 
>
> Maybe a coccinelle fixup script would help in future.

Checkpatch will now complain for new users of RTE_LOG().
So hopefully, using RTE_LOG() will be an exception, rather than a
normal occurence.


-- 
David Marchand



Re: [v2] net/af_xdp: enable a sock path alongside use_cni

2023-12-11 Thread Maryam Tahhan

On 08/12/2023 18:10, Maryam Tahhan wrote:

Thanks Stephen,  I will have a look.

I've seen a few places mention that abstract sockets are attached to 
the network namespace of a process. For our use case the 2 processes 
(pods) will have separate network namespaces. So I'm not sure it will 
work. However, it should be easy to validate and I can give it a try 
in a k8s environment for completeness. Otherwise all the pods would 
need to be host networked which is not what we want at all.


I was able to find a case where abstract sockets were used by 
containerd (CVE-2020-15257) [1]. Our AF_XDP DP Pod is also host 
networked and so it seems that we would be opening ourselves up to 
similar issues,  in that a bad acting container could block containers 
that actually want to use afxdp_dp by simply connecting to the DP and 
just failing to handshake on all the abstract sockets it finds in,the 
host namespace.


I will circle back on Mon re the first open, but considering that 
containerd abandoned this approach, I'm not sure it's the way to go 
for us. But let's cross that bridge after we have an answer to the 
first issue.



Hi Stephen

Circling back, I built a simple example here [1] using kind. The 
abstract sockets don't work across network namespaces (which is our 
scenario with the Pods) and so will not be usable for what we are trying 
to do here.


The example creates a simple kind cluster. It builds a simple docker 
image that incorporates socat. Then it launches 2 pods:


- The first pod is the server (it will use socat to create an abstract 
socket).


- The second pod is the client (it will use socat to try to connect to 
the abstract socket).



The connection attempt in the client fails.


[1] https://github.com/maryamtahhan/ans-kind-example



[PATCH v2 1/3] common/cnxk: update timer base code

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Update event timer base code.

Signed-off-by: Pavan Nikhilesh 
---
v2 Changes:
- Split patches.

 drivers/common/cnxk/hw/tim.h   |  5 +++--
 drivers/common/cnxk/roc_mbox.h | 11 +++
 drivers/common/cnxk/roc_sso.c  |  2 +-
 drivers/common/cnxk/roc_tim.c  | 27 ++-
 drivers/common/cnxk/roc_tim.h  |  3 +++
 5 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/drivers/common/cnxk/hw/tim.h b/drivers/common/cnxk/hw/tim.h
index 61c38ae175..82b094e3dc 100644
--- a/drivers/common/cnxk/hw/tim.h
+++ b/drivers/common/cnxk/hw/tim.h
@@ -49,7 +49,8 @@
 #define TIM_LF_RING_REL   (0x400)

 #define TIM_MAX_INTERVAL_TICKS ((1ULL << 32) - 1)
-#define TIM_MAX_BUCKET_SIZE((1ULL << 20) - 1)
-#define TIM_MIN_BUCKET_SIZE3
+#define TIM_MAX_BUCKET_SIZE((1ULL << 20) - 2)
+#define TIM_MIN_BUCKET_SIZE1
+#define TIM_BUCKET_WRAP_SIZE   3

 #endif /* __TIM_HW_H__ */
diff --git a/drivers/common/cnxk/roc_mbox.h b/drivers/common/cnxk/roc_mbox.h
index 05434aec5a..4590e5f2dd 100644
--- a/drivers/common/cnxk/roc_mbox.h
+++ b/drivers/common/cnxk/roc_mbox.h
@@ -154,6 +154,8 @@ struct mbox_msghdr {
M(TIM_DISABLE_RING, 0x804, tim_disable_ring, tim_ring_req, msg_rsp)\
M(TIM_GET_MIN_INTVL, 0x805, tim_get_min_intvl, tim_intvl_req,  \
  tim_intvl_rsp)   \
+   M(TIM_CAPTURE_COUNTERS, 0x806, tim_capture_counters, msg_req,  \
+ tim_capture_rsp) \
/* CPT mbox IDs (range 0xA00 - 0xBFF) */   \
M(CPT_LF_ALLOC, 0xA00, cpt_lf_alloc, cpt_lf_alloc_req_msg, msg_rsp)\
M(CPT_LF_FREE, 0xA01, cpt_lf_free, msg_req, msg_rsp)   \
@@ -2541,6 +2543,10 @@ enum tim_clk_srcs {
TIM_CLK_SRCS_GPIO = 1,
TIM_CLK_SRCS_GTI = 2,
TIM_CLK_SRCS_PTP = 3,
+   TIM_CLK_SRCS_SYNCE = 4,
+   TIM_CLK_SRCS_BTS = 5,
+   TIM_CLK_SRCS_EXT_MIO = 6,
+   TIM_CLK_SRCS_EXT_GTI = 7,
TIM_CLK_SRSC_INVALID,
 };

@@ -2652,6 +2658,11 @@ struct tim_intvl_rsp {
uint64_t __io intvl_ns;
 };

+struct tim_capture_rsp {
+   struct mbox_msghdr hdr;
+   uint64_t __io counters[TIM_CLK_SRSC_INVALID];
+};
+
 struct sdp_node_info {
/* Node to which this PF belons to */
uint8_t __io node_id;
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index 748d287bad..f09b535c80 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -891,7 +891,7 @@ roc_sso_rsrc_init(struct roc_sso *roc_sso, uint8_t nb_hws, 
uint16_t nb_hwgrp, ui
goto sso_msix_fail;
}

-   nb_tim_lfs = nb_tim_lfs ? PLT_MIN(nb_tim_lfs, free_tim_lfs) : 
free_tim_lfs;
+   nb_tim_lfs = PLT_MIN(nb_tim_lfs, free_tim_lfs);
}

/* 2 error interrupt per TIM LF */
diff --git a/drivers/common/cnxk/roc_tim.c b/drivers/common/cnxk/roc_tim.c
index f8607b2852..095afbb9e6 100644
--- a/drivers/common/cnxk/roc_tim.c
+++ b/drivers/common/cnxk/roc_tim.c
@@ -91,6 +91,31 @@ tim_err_desc(int rc)
}
 }

+int
+roc_tim_capture_counters(struct roc_tim *roc_tim, uint64_t *counters, uint8_t 
nb_cntrs)
+{
+   struct sso *sso = roc_sso_to_sso_priv(roc_tim->roc_sso);
+   struct dev *dev = &sso->dev;
+   struct mbox *mbox = mbox_get(dev->mbox);
+   struct tim_capture_rsp *rsp;
+   int rc, i;
+
+   mbox_alloc_msg_tim_capture_counters(mbox);
+   rc = mbox_process_msg(dev->mbox, (void **)&rsp);
+   if (rc) {
+   tim_err_desc(rc);
+   rc = -EIO;
+   goto fail;
+   }
+
+   for (i = 0; i < nb_cntrs; i++)
+   counters[i] = rsp->counters[i];
+
+fail:
+   mbox_put(mbox);
+   return rc;
+}
+
 int
 roc_tim_lf_enable(struct roc_tim *roc_tim, uint8_t ring_id, uint64_t 
*start_tsc,
  uint32_t *cur_bkt)
@@ -138,7 +163,7 @@ roc_tim_lf_disable(struct roc_tim *roc_tim, uint8_t ring_id)
goto fail;
req->ring = ring_id;

-   rc = mbox_process(dev->mbox);
+   rc = mbox_process(mbox);
if (rc) {
tim_err_desc(rc);
rc = -EIO;
diff --git a/drivers/common/cnxk/roc_tim.h b/drivers/common/cnxk/roc_tim.h
index 7dc9ae0a61..f9a9ad1887 100644
--- a/drivers/common/cnxk/roc_tim.h
+++ b/drivers/common/cnxk/roc_tim.h
@@ -14,6 +14,8 @@ enum roc_tim_clk_src {
ROC_TIM_CLK_SRC_PTP,
ROC_TIM_CLK_SRC_SYNCE,
ROC_TIM_CLK_SRC_BTS,
+   ROC_TIM_CLK_SRC_EXT_MIO,
+   ROC_TIM_CLK_SRC_EXT_GTI,
ROC_TIM_CLK_SRC_INVALID,
 };

@@ -48,5 +50,6 @@ int __roc_api roc_tim_lf_alloc(struct roc_tim *roc_tim, 
uint8_t ring_id,
 int __roc_api roc_tim_lf_free(struct roc_tim *roc_tim, uint8_t ring_id);
 uintptr_t __roc_api roc_tim_lf_base_get(struct roc_tim *roc_tim,
uint8

[PATCH v2 2/3] common/cnxk: update scheduler base code

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Update event scheduler base code.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/common/cnxk/hw/ssow.h |  4 
 drivers/common/cnxk/roc_sso.c | 34 +++---
 drivers/common/cnxk/roc_sso.h |  2 +-
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/common/cnxk/hw/ssow.h b/drivers/common/cnxk/hw/ssow.h
index 618ab7973b..c146a8c3ef 100644
--- a/drivers/common/cnxk/hw/ssow.h
+++ b/drivers/common/cnxk/hw/ssow.h
@@ -54,6 +54,8 @@
 #define SSOW_LF_GWS_OP_SWTAG_FULL1   (0xc28ull)
 #define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull)
 
+#define SSOW_LF_GWS_MAX_NW_TIM_US (0x400) /* [CN9K, CN10K) */
+
 /* Enum offsets */
 
 #define SSOW_LF_INT_VEC_IOP (0x0ull)
@@ -65,6 +67,8 @@
 #define SSOW_LF_GWS_TAG_PEND_GET_WORK_BIT 63
 #define SSOW_LF_GWS_TAG_PEND_SWITCH_BIT  62
 #define SSOW_LF_GWS_TAG_PEND_DESCHED_BIT  58
+#define SSOW_LF_GWS_TAG_PEND_FLUSH   56
+#define SSOW_LF_GWS_TAG_PEND_SWUNT   54
 #define SSOW_LF_GWS_TAG_HEAD_BIT 35
 
 #endif /* __SSOW_HW_H__ */
diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index f09b535c80..e5c16b2a05 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -17,6 +17,11 @@ sso_lf_alloc(struct dev *dev, enum sso_lf_type lf_type, 
uint16_t nb_lf,
struct mbox *mbox = mbox_get(dev->mbox);
int rc = -ENOSPC;
 
+   if (!nb_lf) {
+   mbox_put(mbox);
+   return 0;
+   }
+
switch (lf_type) {
case SSO_LF_TYPE_HWS: {
struct ssow_lf_alloc_req *req;
@@ -56,6 +61,11 @@ sso_lf_free(struct dev *dev, enum sso_lf_type lf_type, 
uint16_t nb_lf)
struct mbox *mbox = mbox_get(dev->mbox);
int rc = -ENOSPC;
 
+   if (!nb_lf) {
+   mbox_put(mbox);
+   return 0;
+   }
+
switch (lf_type) {
case SSO_LF_TYPE_HWS: {
struct ssow_lf_free_req *req;
@@ -98,6 +108,11 @@ sso_rsrc_attach(struct roc_sso *roc_sso, enum sso_lf_type 
lf_type,
struct rsrc_attach_req *req;
int rc = -ENOSPC;
 
+   if (!nb_lf) {
+   mbox_put(mbox);
+   return 0;
+   }
+
req = mbox_alloc_msg_attach_resources(mbox);
if (req == NULL)
goto exit;
@@ -264,13 +279,10 @@ roc_sso_hwgrp_base_get(struct roc_sso *roc_sso, uint16_t 
hwgrp)
 }
 
 uint64_t
-roc_sso_ns_to_gw(struct roc_sso *roc_sso, uint64_t ns)
+roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 {
-   struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
-   uint64_t current_us, current_ns, new_ns;
-   uintptr_t base;
+   uint64_t current_us;
 
-   base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20);
current_us = plt_read64(base + SSOW_LF_GWS_NW_TIM);
/* From HRM, table 14-19:
 * The SSOW_LF_GWS_NW_TIM[NW_TIM] period is specified in n-1 notation.
@@ -279,14 +291,11 @@ roc_sso_ns_to_gw(struct roc_sso *roc_sso, uint64_t ns)
 
/* From HRM, table 14-1:
 * SSOW_LF_GWS_NW_TIM[NW_TIM] specifies the minimum timeout. The SSO
-* hardware times out a GET_WORK request within 2 usec of the minimum
+* hardware times out a GET_WORK request within 1 usec of the minimum
 * timeout specified by SSOW_LF_GWS_NW_TIM[NW_TIM].
 */
-   current_us += 2;
-   current_ns = current_us * 1E3;
-   new_ns = (ns - PLT_MIN(ns, current_ns));
-   new_ns = !new_ns ? 1 : new_ns;
-   return (new_ns * plt_tsc_hz()) / 1E9;
+   current_us += 1;
+   return PLT_MAX(1UL, (uint64_t)PLT_DIV_CEIL(ns, (current_us * 1E3)));
 }
 
 int
@@ -705,6 +714,9 @@ roc_sso_hwgrp_release_xaq(struct roc_sso *roc_sso, uint16_t 
hwgrps)
struct dev *dev = &sso->dev;
int rc;
 
+   if (!hwgrps)
+   return 0;
+
rc = sso_hwgrp_release_xaq(dev, hwgrps);
return rc;
 }
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 64f14b8119..26061f25f8 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -83,7 +83,7 @@ int __roc_api roc_sso_hwgrp_release_xaq(struct roc_sso 
*roc_sso,
 int __roc_api roc_sso_hwgrp_set_priority(struct roc_sso *roc_sso,
 uint16_t hwgrp, uint8_t weight,
 uint8_t affinity, uint8_t priority);
-uint64_t __roc_api roc_sso_ns_to_gw(struct roc_sso *roc_sso, uint64_t ns);
+uint64_t __roc_api roc_sso_ns_to_gw(uint64_t base, uint64_t ns);
 int __roc_api roc_sso_hws_link(struct roc_sso *roc_sso, uint8_t hws, uint16_t 
hwgrp[],
   uint16_t nb_hwgrp, uint8_t set);
 int __roc_api roc_sso_hws_unlink(struct roc_sso *roc_sso, uint8_t hws, 
uint16_t hwgrp[],
-- 
2.25.1



[PATCH v2 3/3] event/cnxk: add option to update links via mbox

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Add option to update event queue to event port links via
mailbox.

Signed-off-by: Pavan Nikhilesh 
---
 drivers/common/cnxk/roc_sso.c   | 87 +
 drivers/common/cnxk/roc_sso.h   |  4 +-
 drivers/event/cnxk/cn10k_eventdev.c |  6 +-
 drivers/event/cnxk/cn9k_eventdev.c  | 18 +++---
 4 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/drivers/common/cnxk/roc_sso.c b/drivers/common/cnxk/roc_sso.c
index e5c16b2a05..293b0c81a1 100644
--- a/drivers/common/cnxk/roc_sso.c
+++ b/drivers/common/cnxk/roc_sso.c
@@ -235,6 +235,47 @@ sso_hws_link_modify(uint8_t hws, uintptr_t base, struct 
plt_bitmap *bmp, uint16_
}
 }
 
+static int
+sso_hws_link_modify_af(struct dev *dev, uint8_t hws, struct plt_bitmap *bmp, 
uint16_t hwgrp[],
+  uint16_t n, uint8_t set, uint16_t enable)
+{
+   struct mbox *mbox = mbox_get(dev->mbox);
+   struct ssow_chng_mship *req;
+   int rc, i;
+
+   req = mbox_alloc_msg_ssow_chng_mship(mbox);
+   if (req == NULL) {
+   rc = mbox_process(mbox);
+   if (rc) {
+   mbox_put(mbox);
+   return -EIO;
+   }
+   req = mbox_alloc_msg_ssow_chng_mship(mbox);
+   if (req == NULL) {
+   mbox_put(mbox);
+   return -ENOSPC;
+   }
+   }
+   req->enable = enable;
+   req->set = set;
+   req->hws = hws;
+   req->nb_hwgrps = n;
+   for (i = 0; i < n; i++)
+   req->hwgrps[i] = hwgrp[i];
+   rc = mbox_process(mbox);
+   mbox_put(mbox);
+   if (rc == MBOX_MSG_INVALID)
+   return rc;
+   if (rc)
+   return -EIO;
+
+   for (i = 0; i < n; i++)
+   enable ? plt_bitmap_set(bmp, hwgrp[i]) :
+plt_bitmap_clear(bmp, hwgrp[i]);
+
+   return 0;
+}
+
 static int
 sso_msix_fill(struct roc_sso *roc_sso, uint16_t nb_hws, uint16_t nb_hwgrp)
 {
@@ -300,31 +341,55 @@ roc_sso_ns_to_gw(uint64_t base, uint64_t ns)
 
 int
 roc_sso_hws_link(struct roc_sso *roc_sso, uint8_t hws, uint16_t hwgrp[], 
uint16_t nb_hwgrp,
-uint8_t set)
+uint8_t set, bool use_mbox)
 {
-   struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
-   struct sso *sso;
+   struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+   struct dev *dev = &sso->dev;
uintptr_t base;
+   int rc;
 
-   sso = roc_sso_to_sso_priv(roc_sso);
+   if (!nb_hwgrp)
+   return 0;
+
+   if (use_mbox && roc_model_is_cn10k()) {
+   rc = sso_hws_link_modify_af(dev, hws, sso->link_map[hws], 
hwgrp, nb_hwgrp, set, 1);
+   if (rc == MBOX_MSG_INVALID)
+   goto lf_access;
+   if (rc < 0)
+   return 0;
+   goto done;
+   }
+lf_access:
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | hws << 12);
sso_hws_link_modify(hws, base, sso->link_map[hws], hwgrp, nb_hwgrp, 
set, 1);
-
+done:
return nb_hwgrp;
 }
 
 int
-roc_sso_hws_unlink(struct roc_sso *roc_sso, uint8_t hws, uint16_t hwgrp[], 
uint16_t nb_hwgrp,
-  uint8_t set)
+roc_sso_hws_unlink(struct roc_sso *roc_sso, uint8_t hws, uint16_t hwgrp[],
+  uint16_t nb_hwgrp, uint8_t set, bool use_mbox)
 {
-   struct dev *dev = &roc_sso_to_sso_priv(roc_sso)->dev;
-   struct sso *sso;
+   struct sso *sso = roc_sso_to_sso_priv(roc_sso);
+   struct dev *dev = &sso->dev;
uintptr_t base;
+   int rc;
 
-   sso = roc_sso_to_sso_priv(roc_sso);
+   if (!nb_hwgrp)
+   return 0;
+
+   if (use_mbox && roc_model_is_cn10k()) {
+   rc = sso_hws_link_modify_af(dev, hws, sso->link_map[hws], 
hwgrp, nb_hwgrp, set, 0);
+   if (rc == MBOX_MSG_INVALID)
+   goto lf_access;
+   if (rc < 0)
+   return 0;
+   goto done;
+   }
+lf_access:
base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | hws << 12);
sso_hws_link_modify(hws, base, sso->link_map[hws], hwgrp, nb_hwgrp, 
set, 0);
-
+done:
return nb_hwgrp;
 }
 
diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
index 26061f25f8..4ac901762e 100644
--- a/drivers/common/cnxk/roc_sso.h
+++ b/drivers/common/cnxk/roc_sso.h
@@ -85,9 +85,9 @@ int __roc_api roc_sso_hwgrp_set_priority(struct roc_sso 
*roc_sso,
 uint8_t affinity, uint8_t priority);
 uint64_t __roc_api roc_sso_ns_to_gw(uint64_t base, uint64_t ns);
 int __roc_api roc_sso_hws_link(struct roc_sso *roc_sso, uint8_t hws, uint16_t 
hwgrp[],
-  uint16_t nb_hwgrp, uint8_t set);
+  uint16_t nb_hwgrp, uint8_t set, bool use_mbox);
 int __roc_api roc_sso_hws_unlink(struct roc_sso *roc_sso, uint8_t hws, 
uint16_t hwgrp[],
-

Re: [EXT] Libtpa: a DPDK based userspace TCP stack implementation

2023-12-11 Thread Jerin Jacob
On Mon, Dec 11, 2023 at 5:48 PM Yuanhan Liu  wrote:
>
> On Mon, Dec 11, 2023 at 11:32:16AM +, Jerin Jacob Kollanukkaran wrote:
> >
> >
> > > -Original Message-
> > > From: Yuanhan Liu 
> > > Sent: Monday, December 11, 2023 3:27 PM
> > > To: lib...@googlegroups.com
> > > Cc: dev@dpdk.org; Yuanhan Liu 
> > > Subject: [EXT] Libtpa: a DPDK based userspace TCP stack implementation
> > >
> > > External Email
> > >
> > > --
> > > Hi all,
> > >
> > > I'd like to share a new DPDK open source project, libtpa(Transport 
> > > Protocol
> > > Acceleration)[0], which is just another userspace TCP stack 
> > > implementation so
> > > far, written from scratch.
> > >
> > > I started this project 3 years ago, while I was searching for a feasible 
> > > open
> > > source project with no luck. There were indeed quite a few options, but 
> > > none of
> > > them actually met my needs. I then started writing one. Likely, there are 
> > > still
> > > other guys out there looking for a high performance and stable userspace 
> > > TCP
> > > stack. This is what this email and libtpa for.
> >
> > Great Yuanhan.
> >
> > If you have time and willing to put effort, I suggest make this part of 
> > dpdk code base
> > as new library (tcp or so) and leverage + improve another existing library 
> > such ip_frag.
> >
> > I believe, that is only way.
> > - This code soon won't soon outdated based on new DPDK version
> > - More community review and contributors
> > - More review and features from NIC vendors PoV.
> > - More arch and driver support.
> > - More quality
>
> Hi Jerin,
>
> Thanks for you suggestion and these really are good points!
>
> Although libtpa is currently designed as a libray, I doubt it would suit
> well as a new library to DPDK. Just taking the code base an example,

I think, number of line won't be a concern for upstreaming

> libtpa so far is about 27K lines of code. The TCP part is only about
> 3K lines of code. All the rest are codes supporting the TCP part, such
> as sock tracing, mem file, mem file auto archive, etc. You can look

I think, key piece would be split the code as reusable library(like
mem file)and leverage
existing libraries like eal trace.

DPDK standardized  the new library addition process without doing a
lot of throw away code.
See https://doc.dpdk.org/guides/contributing/new_library.html

> more from the internals page (or even read the code ;)
>
>https://github.com/bytedance/libtpa/blob/main/doc/internals.rst
>
> Thanks,
> Yuanhan Liu
>
>
> >
> > Just my 2c.
> >
> > --
> > You received this message because you are subscribed to the Google Groups 
> > "libtpa" group.
> > To unsubscribe from this group and stop receiving emails from it, send an 
> > email to libtpa+unsubscr...@googlegroups.com.
> > To view this discussion on the web visit 
> > https://groups.google.com/d/msgid/libtpa/BY3PR18MB478592AF236C7BBDB0285E3CC88FA%40BY3PR18MB4785.namprd18.prod.outlook.com.
> > For more options, visit https://groups.google.com/d/optout.


[PATCH v5 1/3] net/octeon_ep: optimize Rx and Tx routines

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Preset rearm data to avoid writing multiple fields in fastpath,
Increase maximum outstanding Tx instructions from 128 to 256.

Signed-off-by: Pavan Nikhilesh 
---
v5 Changes:
- Make release notes more verbose.
v4 Changes:
- Fix checkpatch.
- Update release notes.
v3 Chnages:
- Add more comments to the code.
- Re-enable 32b build to prevent ABI break.
v2 Changes:
- Skip compiling for 32b x86 targets.

 doc/guides/rel_notes/release_24_03.rst |  5 +
 drivers/net/octeon_ep/cnxk_ep_rx.c | 12 
 drivers/net/octeon_ep/otx_ep_common.h  |  3 +++
 drivers/net/octeon_ep/otx_ep_rxtx.c| 27 ++
 drivers/net/octeon_ep/otx_ep_rxtx.h|  2 +-
 5 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/doc/guides/rel_notes/release_24_03.rst 
b/doc/guides/rel_notes/release_24_03.rst
index 6f8ad27808..2265814c55 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -55,6 +55,11 @@ New Features
  Also, make sure to start the actual text at the margin.
  ===

+* **Updated Marvell Octeon ep driver.**
+
+  * Optimize mbuf rearm sequence.
+  * Updated Tx queue mbuf free thresholds from 128 to 256 for better 
performance.
+

 Removed Items
 -
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
index 74f0011283..75bb7225d2 100644
--- a/drivers/net/octeon_ep/cnxk_ep_rx.c
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -93,7 +93,7 @@ cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
new_pkts = val - droq->pkts_sent_ism_prev;
droq->pkts_sent_ism_prev = val;

-   if (val > (uint32_t)(1 << 31)) {
+   if (val > RTE_BIT32(31)) {
/* Only subtract the packet count in the HW counter
 * when count above halfway to saturation.
 */
@@ -128,7 +128,6 @@ cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, 
struct otx_ep_droq *droq,
 {
struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
-   uint16_t port_id = droq->otx_ep_dev->port_id;
uint16_t nb_desc = droq->nb_desc;
uint16_t pkts;

@@ -137,14 +136,19 @@ cnxk_ep_process_pkts_scalar(struct rte_mbuf **rx_pkts, 
struct otx_ep_droq *droq,
struct rte_mbuf *mbuf;
uint16_t pkt_len;

+   rte_prefetch0(recv_buf_list[otx_ep_incr_index(read_idx, 2, 
nb_desc)]);
+   
rte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[otx_ep_incr_index(read_idx,
+  
2, nb_desc)],
+ void *));
+
mbuf = recv_buf_list[read_idx];
info = rte_pktmbuf_mtod(mbuf, struct otx_ep_droq_info *);
read_idx = otx_ep_incr_index(read_idx, 1, nb_desc);
pkt_len = rte_bswap16(info->length >> 48);
-   mbuf->data_off += OTX_EP_INFO_SIZE;
mbuf->pkt_len = pkt_len;
mbuf->data_len = pkt_len;
-   mbuf->port = port_id;
+
+   *(uint64_t *)&mbuf->rearm_data = droq->rearm_data;
rx_pkts[pkts] = mbuf;
bytes_rsvd += pkt_len;
}
diff --git a/drivers/net/octeon_ep/otx_ep_common.h 
b/drivers/net/octeon_ep/otx_ep_common.h
index 82e57520d3..299b5122d8 100644
--- a/drivers/net/octeon_ep/otx_ep_common.h
+++ b/drivers/net/octeon_ep/otx_ep_common.h
@@ -365,6 +365,9 @@ struct otx_ep_droq {
/* receive buffer list contains mbuf ptr list */
struct rte_mbuf **recv_buf_list;

+   /* Packet re-arm data. */
+   uint64_t rearm_data;
+
/* Packets pending to be processed */
uint64_t pkts_pending;

diff --git a/drivers/net/octeon_ep/otx_ep_rxtx.c 
b/drivers/net/octeon_ep/otx_ep_rxtx.c
index c421ef0a1c..40c4a16a38 100644
--- a/drivers/net/octeon_ep/otx_ep_rxtx.c
+++ b/drivers/net/octeon_ep/otx_ep_rxtx.c
@@ -284,6 +284,32 @@ otx_ep_droq_setup_ring_buffers(struct otx_ep_droq *droq)
return 0;
 }

+static inline uint64_t
+otx_ep_set_rearm_data(struct otx_ep_device *otx_ep)
+{
+   uint16_t port_id = otx_ep->port_id;
+   struct rte_mbuf mb_def;
+   uint64_t *tmp;
+
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) % 8 != 0);
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) - offsetof(struct 
rte_mbuf, data_off) !=
+2);
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) - offsetof(struct 
rte_mbuf, data_off) !=
+4);
+   RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) - offsetof(struct 
rte_mbuf, data_off) !=
+6);
+   mb_def.nb_segs = 1;
+   mb_def.data_off = RTE_PKTMBUF_HEADROOM + OTX_EP_INFO_SIZE;
+   mb_def.port = port_id;
+   rte_mbuf_refcnt_set(&mb_def, 1);
+
+   /* Prevent compiler reordering: rearm_da

[PATCH v5 2/3] net/octeon_ep: use SSE instructions for Rx routine

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Optimize Rx routine to use SSE instructions.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/rel_notes/release_24_03.rst |   1 +
 drivers/net/octeon_ep/cnxk_ep_rx.c | 159 +--
 drivers/net/octeon_ep/cnxk_ep_rx.h | 167 +
 drivers/net/octeon_ep/cnxk_ep_rx_sse.c | 130 +++
 drivers/net/octeon_ep/meson.build  |  11 ++
 drivers/net/octeon_ep/otx_ep_ethdev.c  |   7 ++
 drivers/net/octeon_ep/otx_ep_rxtx.h|   6 +
 7 files changed, 323 insertions(+), 158 deletions(-)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx.h
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx_sse.c

diff --git a/doc/guides/rel_notes/release_24_03.rst 
b/doc/guides/rel_notes/release_24_03.rst
index 2265814c55..2767d2a91b 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -59,6 +59,7 @@ New Features
 
   * Optimize mbuf rearm sequence.
   * Updated Tx queue mbuf free thresholds from 128 to 256 for better 
performance.
+  * Added optimized SSE Rx routines.
 
 
 Removed Items
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx.c
index 75bb7225d2..f3e4fb27d1 100644
--- a/drivers/net/octeon_ep/cnxk_ep_rx.c
+++ b/drivers/net/octeon_ep/cnxk_ep_rx.c
@@ -2,164 +2,7 @@
  * Copyright(C) 2023 Marvell.
  */
 
-#include "otx_ep_common.h"
-#include "otx2_ep_vf.h"
-#include "otx_ep_rxtx.h"
-
-static inline int
-cnxk_ep_rx_refill_mbuf(struct otx_ep_droq *droq, uint32_t count)
-{
-   struct otx_ep_droq_desc *desc_ring = droq->desc_ring;
-   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
-   uint32_t refill_idx = droq->refill_idx;
-   struct rte_mbuf *buf;
-   uint32_t i;
-   int rc;
-
-   rc = rte_pktmbuf_alloc_bulk(droq->mpool, &recv_buf_list[refill_idx], 
count);
-   if (unlikely(rc)) {
-   droq->stats.rx_alloc_failure++;
-   return rc;
-   }
-
-   for (i = 0; i < count; i++) {
-   buf = recv_buf_list[refill_idx];
-   desc_ring[refill_idx].buffer_ptr = 
rte_mbuf_data_iova_default(buf);
-   refill_idx++;
-   }
-
-   droq->refill_idx = otx_ep_incr_index(droq->refill_idx, count, 
droq->nb_desc);
-   droq->refill_count -= count;
-
-   return 0;
-}
-
-static inline void
-cnxk_ep_rx_refill(struct otx_ep_droq *droq)
-{
-   uint32_t desc_refilled = 0, count;
-   uint32_t nb_desc = droq->nb_desc;
-   uint32_t refill_idx = droq->refill_idx;
-   int rc;
-
-   if (unlikely(droq->read_idx == refill_idx))
-   return;
-
-   if (refill_idx < droq->read_idx) {
-   count = droq->read_idx - refill_idx;
-   rc = cnxk_ep_rx_refill_mbuf(droq, count);
-   if (unlikely(rc)) {
-   droq->stats.rx_alloc_failure++;
-   return;
-   }
-   desc_refilled = count;
-   } else {
-   count = nb_desc - refill_idx;
-   rc = cnxk_ep_rx_refill_mbuf(droq, count);
-   if (unlikely(rc)) {
-   droq->stats.rx_alloc_failure++;
-   return;
-   }
-
-   desc_refilled = count;
-   count = droq->read_idx;
-   rc = cnxk_ep_rx_refill_mbuf(droq, count);
-   if (unlikely(rc)) {
-   droq->stats.rx_alloc_failure++;
-   return;
-   }
-   desc_refilled += count;
-   }
-
-   /* Flush the droq descriptor data to memory to be sure
-* that when we update the credits the data in memory is
-* accurate.
-*/
-   rte_io_wmb();
-   rte_write32(desc_refilled, droq->pkts_credit_reg);
-}
-
-static inline uint32_t
-cnxk_ep_check_rx_pkts(struct otx_ep_droq *droq)
-{
-   uint32_t new_pkts;
-   uint32_t val;
-
-   /* Batch subtractions from the HW counter to reduce PCIe traffic
-* This adds an extra local variable, but almost halves the
-* number of PCIe writes.
-*/
-   val = __atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED);
-   new_pkts = val - droq->pkts_sent_ism_prev;
-   droq->pkts_sent_ism_prev = val;
-
-   if (val > RTE_BIT32(31)) {
-   /* Only subtract the packet count in the HW counter
-* when count above halfway to saturation.
-*/
-   rte_write64((uint64_t)val, droq->pkts_sent_reg);
-   rte_mb();
-
-   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   while (__atomic_load_n(droq->pkts_sent_ism, __ATOMIC_RELAXED) 
>= val) {
-   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   rte_mb();
-   }
-
-   droq->pkts_sent_ism_prev = 0;
-   }
-   rte_write64(OTX2_SDP_REQUEST_ISM, droq->pkts_sent_reg);
-   droq->pkt

[PATCH v5 3/3] net/octeon_ep: use AVX2 instructions for Rx

2023-12-11 Thread pbhagavatula
From: Pavan Nikhilesh 

Optimize Rx routine to use AVX2 instructions when underlying
architecture supports it.

Signed-off-by: Pavan Nikhilesh 
---
 doc/guides/rel_notes/release_24_03.rst |   1 +
 drivers/net/octeon_ep/cnxk_ep_rx_avx.c | 123 +
 drivers/net/octeon_ep/meson.build  |  12 +++
 drivers/net/octeon_ep/otx_ep_ethdev.c  |  10 ++
 drivers/net/octeon_ep/otx_ep_rxtx.h|   6 ++
 5 files changed, 152 insertions(+)
 create mode 100644 drivers/net/octeon_ep/cnxk_ep_rx_avx.c

diff --git a/doc/guides/rel_notes/release_24_03.rst 
b/doc/guides/rel_notes/release_24_03.rst
index 2767d2a91b..b392a4f30a 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -60,6 +60,7 @@ New Features
   * Optimize mbuf rearm sequence.
   * Updated Tx queue mbuf free thresholds from 128 to 256 for better 
performance.
   * Added optimized SSE Rx routines.
+  * Added optimized AVX2 Rx routines.
 
 
 Removed Items
diff --git a/drivers/net/octeon_ep/cnxk_ep_rx_avx.c 
b/drivers/net/octeon_ep/cnxk_ep_rx_avx.c
new file mode 100644
index 00..ae4615e6da
--- /dev/null
+++ b/drivers/net/octeon_ep/cnxk_ep_rx_avx.c
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#include "cnxk_ep_rx.h"
+
+static __rte_always_inline void
+cnxk_ep_process_pkts_vec_avx(struct rte_mbuf **rx_pkts, struct otx_ep_droq 
*droq, uint16_t new_pkts)
+{
+   struct rte_mbuf **recv_buf_list = droq->recv_buf_list;
+   uint32_t bytes_rsvd = 0, read_idx = droq->read_idx;
+   const uint64_t rearm_data = droq->rearm_data;
+   struct rte_mbuf *m[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
+   uint32_t pidx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
+   uint32_t idx[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
+   uint16_t nb_desc = droq->nb_desc;
+   uint16_t pkts = 0;
+   uint8_t i;
+
+   idx[0] = read_idx;
+   while (pkts < new_pkts) {
+   __m256i data[CNXK_EP_OQ_DESC_PER_LOOP_AVX];
+   /* mask to shuffle from desc. to mbuf (2 descriptors)*/
+   const __m256i mask =
+   _mm256_set_epi8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 20, 
21, 0xFF, 0xFF, 20,
+   21, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
0xFF, 0xFF, 0xFF,
+   0xFF, 0xFF, 0xFF, 7, 6, 5, 4, 3, 2, 1, 
0);
+
+   /* Load indexes. */
+   for (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   idx[i] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
+
+   /* Prefetch next indexes. */
+   if (new_pkts - pkts > 8) {
+   pidx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
+   for (i = 1; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   pidx[i] = otx_ep_incr_index(pidx[i - 1], 1, 
nb_desc);
+
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {
+   rte_prefetch0(recv_buf_list[pidx[i]]);
+   
rte_prefetch0(rte_pktmbuf_mtod(recv_buf_list[pidx[i]], void *));
+   }
+   }
+
+   /* Load mbuf array. */
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   m[i] = recv_buf_list[idx[i]];
+
+   /* Load rearm data and packet length for shuffle. */
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   data[i] = _mm256_set_epi64x(0,
+   rte_pktmbuf_mtod(m[i], struct otx_ep_droq_info 
*)->length >> 16,
+   0, rearm_data);
+
+   /* Shuffle data to its place and sum the packet length. */
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++) {
+   data[i] = _mm256_shuffle_epi8(data[i], mask);
+   bytes_rsvd += _mm256_extract_epi16(data[i], 10);
+   }
+
+   /* Store the 256bit data to the mbuf. */
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   _mm256_storeu_si256((__m256i *)&m[i]->rearm_data, 
data[i]);
+
+   for (i = 0; i < CNXK_EP_OQ_DESC_PER_LOOP_AVX; i++)
+   rx_pkts[pkts++] = m[i];
+   idx[0] = otx_ep_incr_index(idx[i - 1], 1, nb_desc);
+   }
+   droq->read_idx = idx[0];
+
+   droq->refill_count += new_pkts;
+   droq->pkts_pending -= new_pkts;
+   /* Stats */
+   droq->stats.pkts_received += new_pkts;
+   droq->stats.bytes_received += bytes_rsvd;
+}
+
+uint16_t __rte_noinline __rte_hot
+cnxk_ep_recv_pkts_avx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t 
nb_pkts)
+{
+   struct otx_ep_droq *droq = (struct otx_ep_droq *)rx_queue;
+   uint16_t new_pkts, vpkts;
+
+   new_pkts = cnxk_ep_rx_pkts_to_process(droq, nb_pkts);
+   vpkts = RTE_ALIGN_FLOOR(new_pkts, CNXK_EP_OQ_DESC_PER_LOOP_AV

Re: [PATCH] event/cnxk: fix dequeue timeout configuration

2023-12-11 Thread Jerin Jacob
On Thu, Dec 7, 2023 at 1:43 PM  wrote:
>
> From: Pavan Nikhilesh 
>
> Allow dequeue timeout to be configured as zero, when
> RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT is disabled.
>
> Fixes: 5512c7de85cd ("event/cnxk: add common configuration validation")
> Cc: sta...@dpdk.org
>
> Signed-off-by: Pavan Nikhilesh 

Applied to dpdk-next-eventdev/for-main. Thanks


Re: [PATCH v2 00/11] Add basic flow support for corenic firmware

2023-12-11 Thread Ferruh Yigit
On 12/11/2023 1:42 AM, Chaoyong He wrote:
>> On 12/5/2023 2:54 AM, Chaoyong He wrote:
>>> Add the very basic rte_flow support for corenic firmware.
>>>
>>> ---
>>> v2:
>>> * Update the 'nfp.ini' document.
>>> * Rebase to the latest main branch.
>>> ---
>>>
>>> Chaoyong He (11):
>>>   net/nfp: move some source files
>>>   net/nfp: add the structures and functions for flow offload
>>>   net/nfp: add the control message channel
>>>   net/nfp: support flow API for CoreNIC firmware
>>>   net/nfp: support Ethernet flow item
>>>   net/nfp: support drop flow action
>>>   net/nfp: support IPv4 flow item
>>>   net/nfp: support IPv6 flow item
>>>   net/nfp: support TCP/UDP/SCTP flow items
>>>   net/nfp: support MARK flow action
>>>   net/nfp: support QUEUE flow action
>>>
>>
>> Series applied to dpdk-next-net/main, thanks.
>>
>>
>> Flower firmware already supports flow API, right?
> Correct.
> 
>> What is the difference of these two firmware? Are they both actively used?
> The flower firmware aims to support as much flow APIs (and match 
> patterns/actions)as possible, 
> and the corenic firmware will only support very limited ones (just enough) to 
> some strict user case, like flow steering, 
> bond offload ...
>

I see, thanks for the information.


[v3] net/af_xdp: enable uds-path instead of use_cni

2023-12-11 Thread Maryam Tahhan
With the original 'use_cni' implementation, (using a
hardcoded socket rather than a configurable one),
if a single pod is requesting multiple net devices
and these devices are from different pools, then
the container attempts to mount all the netdev UDSes
in the pod as /tmp/afxdp.sock. Which means that at best
only 1 netdev will handshake correctly with the AF_XDP
DP. This patch addresses this by making the socket
parameter configurable using a new vdev param called
'uds_path' and removing the previous 'use_cni' param.
Tested with the AF_XDP DP CNI PR 81, single and multiple
interfaces.

v3:
* Remove `use_cni` vdev argument as it's no longer needed.
* Update incorrect CNI references for the AF_XDP DP in the
  documentation.
* Update the documentation to run a simple example with the
  AF_XDP DP plugin in K8s.

v2:
* Rename sock_path to uds_path.
* Update documentation to reflect when CAP_BPF is needed.
* Fix testpmd arguments in the provided example for Pods.
* Use AF_XDP API to update the xskmap entry.

Signed-off-by: Maryam Tahhan 
---
 doc/guides/howto/af_xdp_cni.rst | 334 +++-
 drivers/net/af_xdp/rte_eth_af_xdp.c |  76 +++
 2 files changed, 216 insertions(+), 194 deletions(-)

diff --git a/doc/guides/howto/af_xdp_cni.rst b/doc/guides/howto/af_xdp_cni.rst
index a1a6d5b99c..b71fef61c7 100644
--- a/doc/guides/howto/af_xdp_cni.rst
+++ b/doc/guides/howto/af_xdp_cni.rst
@@ -1,71 +1,65 @@
 .. SPDX-License-Identifier: BSD-3-Clause
Copyright(c) 2023 Intel Corporation.
 
-Using a CNI with the AF_XDP driver
-==
+Using the AF_XDP Device Plugin with the AF_XDP driver
+==
 
 Introduction
 
 
-CNI, the Container Network Interface, is a technology for configuring
-container network interfaces
-and which can be used to setup Kubernetes networking.
+The `AF_XDP Device Plugin for Kubernetes`_ is a project that provisions
+and advertises interfaces (that can be used with AF_XDP) to Kubernetes.
+The project also includes a `CNI`_.
+
 AF_XDP is a Linux socket Address Family that enables an XDP program
 to redirect packets to a memory buffer in userspace.
 
-This document explains how to enable the `AF_XDP Plugin for Kubernetes`_ within
-a DPDK application using the :doc:`../nics/af_xdp` to connect and use these 
technologies.
-
-.. _AF_XDP Plugin for Kubernetes: 
https://github.com/intel/afxdp-plugins-for-kubernetes
+This document explains how to use the `AF_XDP Device Plugin for Kubernetes`_ 
with
+a DPDK :doc:`../nics/af_xdp` based application running in a Pod.
 
+.. _AF_XDP Device Plugin for Kubernetes: 
https://github.com/intel/afxdp-plugins-for-kubernetes
+.. _CNI: https://github.com/containernetworking/cni
 
 Background
 --
 
-The standard :doc:`../nics/af_xdp` initialization process involves loading an 
eBPF program
-onto the kernel netdev to be used by the PMD.
-This operation requires root or escalated Linux privileges
-and thus prevents the PMD from working in an unprivileged container.
-The AF_XDP CNI plugin handles this situation
-by providing a device plugin that performs the program loading.
-
-At a technical level the CNI opens a Unix Domain Socket and listens for a 
client
-to make requests over that socket.
-A DPDK application acting as a client connects and initiates a configuration 
"handshake".
-The client then receives a file descriptor which points to the XSKMAP
-associated with the loaded eBPF program.
-The XSKMAP is a BPF map of AF_XDP sockets (XSK).
-The client can then proceed with creating an AF_XDP socket
-and inserting that socket into the XSKMAP pointed to by the descriptor.
-
-The EAL vdev argument ``use_cni`` is used to indicate that the user wishes
-to run the PMD in unprivileged mode and to receive the XSKMAP file descriptor
-from the CNI.
-When this flag is set,
-the ``XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD`` libbpf flag
-should be used when creating the socket
-to instruct libbpf not to load the default libbpf program on the netdev.
-Instead the loading is handled by the CNI.
+The standard :doc:`../nics/af_xdp` initialization process involves
+loading an eBPF program onto the kernel netdev to be used by the PMD.
+This operation requires root or escalated Linux privileges and prevents
+the PMD from working in an unprivileged container. The AF_XDP Device plugin
+addresses this situation by providing an entity that manages eBPF program
+lifecycle for Pod interfaces that wish to use AF_XDP, this in turn allows
+the pod to be used without privilege escalation.
+
+In order for the pod to run without privilege escalation, the AF_XDP DP
+creates a Unix Domain Socket (UDS) and listens for Pods to make requests
+for XSKMAP(s) File Descriptors (FDs) for interfaces in their network namespace.
+In other words, the DPDK application running in the Pod connects to this UDS 
and
+initiates a "handshake" to retrieve the XSKMAP(s) FD(s). Upon a successful 
"handshake",
+th

RE: 20.11.10 patches review and test

2023-12-11 Thread Ali Alnubani
> -Original Message-
> From: luca.bocca...@gmail.com 
> Sent: Friday, December 1, 2023 1:51 PM
> To: sta...@dpdk.org
> Cc: dev@dpdk.org; Abhishek Marathe ;
> Ali Alnubani ; benjamin.wal...@intel.com; David
> Christensen ; Hemant Agrawal
> ; Ian Stokes ; Jerin Jacob
> ; John McNamara ; Ju-
> Hyoung Lee ; Kevin Traynor ;
> Luca Boccassi ; Pei Zhang ;
> qian.q...@intel.com; Raslan Darawsheh ; NBU-
> Contact-Thomas Monjalon (EXTERNAL) ; Yanghang
> Liu ; yuan.p...@intel.com; zhaoyan.c...@intel.com
> Subject: 20.11.10 patches review and test
> 
> Hi all,
> 
> Here is a list of patches targeted for stable release 20.11.10.
> 
> The planned date for the final release is December 12th.
> 
> Please help with testing and validation of your use cases and report
> any issues/results with reply-all to this mail. For the final release
> the fixes and reported validations will be added to the release notes.
> 

Hello,

We ran the following functional tests with Nvidia hardware on v20.11.10-rc1:
- Basic functionality:
  Send and receive multiple types of traffic.
- testpmd xstats counter test.
- testpmd timestamp test.
- Changing/checking link status through testpmd.
- rte_flow tests 
(https://doc.dpdk.org/guides/nics/mlx5.html#supported-hardware-offloads)
- RSS tests.
- VLAN filtering, stripping, and insertion tests.
- Checksum and TSO tests.
- ptype tests.
- link_status_interrupt example application tests.
- l3fwd-power example application tests.
- Multi-process example applications tests.
- Hardware LRO tests.

Functional tests ran on:
- NIC: ConnectX-6 Dx / OS: Ubuntu 20.04 / Driver: MLNX_OFED_LINUX-23.10-0.5.5.0 
/ Firmware: 22.39.1002
- NIC: ConnectX-7 / OS: Ubuntu 20.04 / Driver: MLNX_OFED_LINUX-23.10-0.5.5.0 / 
Firmware: 28.39.1002
- DPU: BlueField-2 / DOCA SW version: 2.2.0 / Firmware: 24.38.1002

Additionally, we ran build tests with multiple configurations on the following 
OS/driver combinations:
- Ubuntu 22.04.3 with MLNX_OFED_LINUX-23.10-0.5.5.0.
- Ubuntu 20.04.6 with MLNX_OFED_LINUX-23.07-0.5.1.2.
- Ubuntu 20.04.6 with rdma-core master (0cf342c).
- Ubuntu 20.04.6 with rdma-core v28.0.
- Fedora 38 with rdma-core v44.0.
- Fedora 40 (Rawhide) with rdma-core v48.0.
- OpenSUSE Leap 15.5 with rdma-core v42.0.
- Windows Server 2019 with Clang 16.0.6.

We don't see new issues caused by the changes in this release.

Thanks,
Ali


Re: 20.11.10 patches review and test

2023-12-11 Thread Luca Boccassi
On Mon, 11 Dec 2023 at 15:23, Ali Alnubani  wrote:
>
> > -Original Message-
> > From: luca.bocca...@gmail.com 
> > Sent: Friday, December 1, 2023 1:51 PM
> > To: sta...@dpdk.org
> > Cc: dev@dpdk.org; Abhishek Marathe ;
> > Ali Alnubani ; benjamin.wal...@intel.com; David
> > Christensen ; Hemant Agrawal
> > ; Ian Stokes ; Jerin Jacob
> > ; John McNamara ; Ju-
> > Hyoung Lee ; Kevin Traynor ;
> > Luca Boccassi ; Pei Zhang ;
> > qian.q...@intel.com; Raslan Darawsheh ; NBU-
> > Contact-Thomas Monjalon (EXTERNAL) ; Yanghang
> > Liu ; yuan.p...@intel.com; zhaoyan.c...@intel.com
> > Subject: 20.11.10 patches review and test
> >
> > Hi all,
> >
> > Here is a list of patches targeted for stable release 20.11.10.
> >
> > The planned date for the final release is December 12th.
> >
> > Please help with testing and validation of your use cases and report
> > any issues/results with reply-all to this mail. For the final release
> > the fixes and reported validations will be added to the release notes.
> >
>
> Hello,
>
> We ran the following functional tests with Nvidia hardware on v20.11.10-rc1:
> - Basic functionality:
>   Send and receive multiple types of traffic.
> - testpmd xstats counter test.
> - testpmd timestamp test.
> - Changing/checking link status through testpmd.
> - rte_flow tests 
> (https://doc.dpdk.org/guides/nics/mlx5.html#supported-hardware-offloads)
> - RSS tests.
> - VLAN filtering, stripping, and insertion tests.
> - Checksum and TSO tests.
> - ptype tests.
> - link_status_interrupt example application tests.
> - l3fwd-power example application tests.
> - Multi-process example applications tests.
> - Hardware LRO tests.
>
> Functional tests ran on:
> - NIC: ConnectX-6 Dx / OS: Ubuntu 20.04 / Driver: 
> MLNX_OFED_LINUX-23.10-0.5.5.0 / Firmware: 22.39.1002
> - NIC: ConnectX-7 / OS: Ubuntu 20.04 / Driver: MLNX_OFED_LINUX-23.10-0.5.5.0 
> / Firmware: 28.39.1002
> - DPU: BlueField-2 / DOCA SW version: 2.2.0 / Firmware: 24.38.1002
>
> Additionally, we ran build tests with multiple configurations on the 
> following OS/driver combinations:
> - Ubuntu 22.04.3 with MLNX_OFED_LINUX-23.10-0.5.5.0.
> - Ubuntu 20.04.6 with MLNX_OFED_LINUX-23.07-0.5.1.2.
> - Ubuntu 20.04.6 with rdma-core master (0cf342c).
> - Ubuntu 20.04.6 with rdma-core v28.0.
> - Fedora 38 with rdma-core v44.0.
> - Fedora 40 (Rawhide) with rdma-core v48.0.
> - OpenSUSE Leap 15.5 with rdma-core v42.0.
> - Windows Server 2019 with Clang 16.0.6.
>
> We don't see new issues caused by the changes in this release.

Thank you!


RE: [PATCH v3 8/8] examples/l3fwd-power: update to call arg parser API

2023-12-11 Thread Tummala, Sivaprasad
[AMD Official Use Only - General]

Hi Euan,

> -Original Message-
> From: Euan Bourke 
> Sent: Thursday, December 7, 2023 9:48 PM
> To: dev@dpdk.org
> Cc: Euan Bourke ; Anatoly Burakov
> ; David Hunt ; Tummala,
> Sivaprasad 
> Subject: [PATCH v3 8/8] examples/l3fwd-power: update to call arg parser API
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> Update to the l3fwd-power example application to call the arg parser library 
> for its
> 'combined core string parser' instead of implementing its own corelist 
> parser. The
> default_type passed into the function call is a corelist.
>
> Signed-off-by: Euan Bourke 
> ---
>  examples/l3fwd-power/perf_core.c | 51 +---
>  1 file changed, 8 insertions(+), 43 deletions(-)
>
> diff --git a/examples/l3fwd-power/perf_core.c 
> b/examples/l3fwd-power/perf_core.c
> index 41ef6d0c9a..f8511e30b3 100644
> --- a/examples/l3fwd-power/perf_core.c
> +++ b/examples/l3fwd-power/perf_core.c
> @@ -12,6 +12,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>
>  #include "perf_core.h"
>  #include "main.h"
> @@ -177,56 +178,20 @@ parse_perf_config(const char *q_arg)  int
> parse_perf_core_list(const char *corelist)  {
> -   int i, idx = 0;
> -   unsigned int count = 0;
> -   char *end = NULL;
> -   int min, max;
> +   int count;
> +   uint16_t cores[RTE_MAX_LCORE];
>
> if (corelist == NULL) {
> printf("invalid core list\n");
> return -1;
> }
>
> +   count = rte_arg_parse_core_string(corelist, cores,
> + RTE_DIM(cores), 1);
Can you replace the magic number with "RTE_ARG_PARSE_TYPE_CORELIST" as default 
parse type.

>
> -   /* Remove all blank characters ahead and after */
> -   while (isblank(*corelist))
> -   corelist++;
> -   i = strlen(corelist);
> -   while ((i > 0) && isblank(corelist[i - 1]))
> -   i--;
> +   for (int i = 0; i < count; i++)
> +   hp_lcores[i] = cores[i];
>
> -   /* Get list of cores */
> -   min = RTE_MAX_LCORE;
> -   do {
> -   while (isblank(*corelist))
> -   corelist++;
> -   if (*corelist == '\0')
> -   return -1;
> -   errno = 0;
> -   idx = strtoul(corelist, &end, 10);
> -   if (errno || end == NULL)
> -   return -1;
> -   while (isblank(*end))
> -   end++;
> -   if (*end == '-') {
> -   min = idx;
> -   } else if ((*end == ',') || (*end == '\0')) {
> -   max = idx;
> -   if (min == RTE_MAX_LCORE)
> -   min = idx;
> -   for (idx = min; idx <= max; idx++) {
> -   hp_lcores[count] = idx;
> -   count++;
> -   }
> -   min = RTE_MAX_LCORE;
> -   } else {
> -   printf("invalid core list\n");
> -   return -1;
> -   }
> -   corelist = end + 1;
> -   } while (*end != '\0');
> -
> -   if (count == 0) {
> +   if (count == 0 || count == -1) {
> printf("invalid core list\n");
> return -1;
> }
> @@ -234,7 +199,7 @@ parse_perf_core_list(const char *corelist)
> nb_hp_lcores = count;
>
> printf("Configured %d high performance cores\n", nb_hp_lcores);
> -   for (i = 0; i < nb_hp_lcores; i++)
> +   for (uint16_t i = 0; i < nb_hp_lcores; i++)
> printf("\tHigh performance core %d %d\n",
> i, hp_lcores[i]);
>
> --
> 2.34.1



[PATCH] doc: remove cmdline_poll deprecation notice

2023-12-11 Thread Stephen Hemminger
cmdline_poll has been removed by:
commit f44f2edd198a ("cmdline: remove poll function")
but deprecation notice was left behind.

Signed-off-by: Stephen Hemminger 
---
 doc/guides/rel_notes/deprecation.rst | 4 
 1 file changed, 4 deletions(-)

diff --git a/doc/guides/rel_notes/deprecation.rst 
b/doc/guides/rel_notes/deprecation.rst
index 81b93515cbd9..10630ba25564 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -27,10 +27,6 @@ Deprecation Notices
 * kvargs: The function ``rte_kvargs_process`` will get a new parameter
   for returning key match count. It will ease handling of no-match case.
 
-* cmdline: The function ``cmdline_poll`` does not work correctly on either
-  Linux or Windows and is unused by any part of DPDK.
-  This function is now deprecated and will be removed in DPDK 23.11.
-
 * telemetry: The functions ``rte_tel_data_add_array_u64`` and 
``rte_tel_data_add_dict_u64``,
   used by telemetry callbacks for adding unsigned integer values to be 
returned to the user,
   are renamed to ``rte_tel_data_add_array_uint`` and 
``rte_tel_data_add_dict_uint`` respectively.
-- 
2.42.0



Re: [PATCH] doc: remove cmdline_poll deprecation notice

2023-12-11 Thread Bruce Richardson
On Mon, Dec 11, 2023 at 08:52:37AM -0800, Stephen Hemminger wrote:
> cmdline_poll has been removed by:
> commit f44f2edd198a ("cmdline: remove poll function")
> but deprecation notice was left behind.
> 

Suggest adding that commit in a fixes line and adding stable on Cc so this
can be picked up for backport.

> Signed-off-by: Stephen Hemminger 

Acked-by: Bruce Richardson 

> ---
>  doc/guides/rel_notes/deprecation.rst | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/deprecation.rst 
> b/doc/guides/rel_notes/deprecation.rst
> index 81b93515cbd9..10630ba25564 100644
> --- a/doc/guides/rel_notes/deprecation.rst
> +++ b/doc/guides/rel_notes/deprecation.rst
> @@ -27,10 +27,6 @@ Deprecation Notices
>  * kvargs: The function ``rte_kvargs_process`` will get a new parameter
>for returning key match count. It will ease handling of no-match case.
>  
> -* cmdline: The function ``cmdline_poll`` does not work correctly on either
> -  Linux or Windows and is unused by any part of DPDK.
> -  This function is now deprecated and will be removed in DPDK 23.11.
> -
>  * telemetry: The functions ``rte_tel_data_add_array_u64`` and 
> ``rte_tel_data_add_dict_u64``,
>used by telemetry callbacks for adding unsigned integer values to be 
> returned to the user,
>are renamed to ``rte_tel_data_add_array_uint`` and 
> ``rte_tel_data_add_dict_uint`` respectively.
> -- 
> 2.42.0
> 


[PATCH v3 00/14] support new 5760X P7 devices

2023-12-11 Thread Ajit Khaparde
While some of the patches refactor and improve existing code,
this series adds support for the new 5760X P7 device family.
Follow-on patches will incrementally add more functionality.

v1->v2:
- Fixed unused variable error
- Fixed some spellings
- Code refactoring and fixes in backing store v2

v2->v3:
- Addressed review comments
- Fixed unused arg error

Ajit Khaparde (12):
  net/bnxt: refactor epoch setting
  net/bnxt: update HWRM API
  net/bnxt: use the correct COS queue for Tx
  net/bnxt: refactor mem zone allocation
  net/bnxt: add support for p7 device family
  net/bnxt: refactor code to support P7 devices
  net/bnxt: fix array overflow
  net/bnxt: add support for backing store v2
  net/bnxt: modify sending new HWRM commands to firmware
  net/bnxt: retry HWRM ver get if the command fails
  net/bnxt: cap ring resources for P7 devices
  net/bnxt: add support for v3 Rx completion

Kalesh AP (1):
  net/bnxt: log a message when multicast promisc mode changes

Kishore Padmanabha (1):
  net/bnxt: refactor the ulp initialization

 drivers/net/bnxt/bnxt.h|   97 +-
 drivers/net/bnxt/bnxt_cpr.h|5 +-
 drivers/net/bnxt/bnxt_ethdev.c |  321 -
 drivers/net/bnxt/bnxt_flow.c   |2 +-
 drivers/net/bnxt/bnxt_hwrm.c   |  414 ++-
 drivers/net/bnxt/bnxt_hwrm.h   |   15 +
 drivers/net/bnxt/bnxt_ring.c   |   15 +-
 drivers/net/bnxt/bnxt_rxq.c|2 +-
 drivers/net/bnxt/bnxt_rxr.c|   93 +-
 drivers/net/bnxt/bnxt_rxr.h|   92 ++
 drivers/net/bnxt/bnxt_util.c   |   10 +
 drivers/net/bnxt/bnxt_util.h   |1 +
 drivers/net/bnxt/bnxt_vnic.c   |   58 +-
 drivers/net/bnxt/bnxt_vnic.h   |1 -
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 1531 ++--
 15 files changed, 2407 insertions(+), 250 deletions(-)

-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 01/14] net/bnxt: refactor epoch setting

2023-12-11 Thread Ajit Khaparde
Fix epoch bit setting when we ring the doorbell.
Epoch bit needs to toggle alternatively from 0 to 1 every time the
ring indices wrap.
Currently its value is everything but an alternating 0 and 1.

Remove unnecessary field db_epoch_shift from
bnxt_db_info structure.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Damodharam Ammepalli 
---
 drivers/net/bnxt/bnxt_cpr.h  | 5 ++---
 drivers/net/bnxt/bnxt_ring.c | 9 ++---
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
index 2de154322d..26e81a6a7e 100644
--- a/drivers/net/bnxt/bnxt_cpr.h
+++ b/drivers/net/bnxt/bnxt_cpr.h
@@ -53,11 +53,10 @@ struct bnxt_db_info {
booldb_64;
uint32_tdb_ring_mask;
uint32_tdb_epoch_mask;
-   uint32_tdb_epoch_shift;
 };
 
-#define DB_EPOCH(db, idx)  (((idx) & (db)->db_epoch_mask) <<   \
-((db)->db_epoch_shift))
+#define DB_EPOCH(db, idx)  (!!((idx) & (db)->db_epoch_mask) << \
+DBR_EPOCH_SFT)
 #define DB_RING_IDX(db, idx)   (((idx) & (db)->db_ring_mask) | \
 DB_EPOCH(db, idx))
 
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 34b2510d54..6dacb1b37f 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -371,9 +371,10 @@ static void bnxt_set_db(struct bnxt *bp,
db->db_key64 = DBR_PATH_L2;
break;
}
-   if (BNXT_CHIP_SR2(bp)) {
+   if (BNXT_CHIP_P7(bp)) {
db->db_key64 |= DBR_VALID;
db_offset = bp->legacy_db_size;
+   db->db_epoch_mask = ring_mask + 1;
} else if (BNXT_VF(bp)) {
db_offset = DB_VF_OFFSET;
}
@@ -397,12 +398,6 @@ static void bnxt_set_db(struct bnxt *bp,
db->db_64 = false;
}
db->db_ring_mask = ring_mask;
-
-   if (BNXT_CHIP_SR2(bp)) {
-   db->db_epoch_mask = db->db_ring_mask + 1;
-   db->db_epoch_shift = DBR_EPOCH_SFT -
-   rte_log2_u32(db->db_epoch_mask);
-   }
 }
 
 static int bnxt_alloc_cmpl_ring(struct bnxt *bp, int queue_index,
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 02/14] net/bnxt: update HWRM API

2023-12-11 Thread Ajit Khaparde
Update HWRM API to version 1.10.2.158

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt_hwrm.c   |3 -
 drivers/net/bnxt/hsi_struct_def_dpdk.h | 1531 ++--
 2 files changed, 1429 insertions(+), 105 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 06f196760f..0a31b984e6 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -5175,9 +5175,6 @@ int bnxt_hwrm_set_ntuple_filter(struct bnxt *bp,
if (enables &
HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK)
req.dst_port_mask = rte_cpu_to_le_16(filter->dst_port_mask);
-   if (enables &
-   HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID)
-   req.mirror_vnic_id = filter->mirror_vnic_id;
 
req.enables = rte_cpu_to_le_32(enables);
 
diff --git a/drivers/net/bnxt/hsi_struct_def_dpdk.h 
b/drivers/net/bnxt/hsi_struct_def_dpdk.h
index 9afdd056ce..65f3f0576b 100644
--- a/drivers/net/bnxt/hsi_struct_def_dpdk.h
+++ b/drivers/net/bnxt/hsi_struct_def_dpdk.h
@@ -1154,8 +1154,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 2
 /* non-zero means beta version */
-#define HWRM_VERSION_RSVD 138
-#define HWRM_VERSION_STR "1.10.2.138"
+#define HWRM_VERSION_RSVD 158
+#define HWRM_VERSION_STR "1.10.2.158"
 
 /
  * hwrm_ver_get *
@@ -6329,19 +6329,14 @@ struct rx_pkt_v3_cmpl_hi {
#define RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL \
(UINT32_C(0x5) << 9)
/*
-* Indicates that the IP checksum failed its check in the tunnel
+* Indicates that the physical packet is shorter than that claimed
+* by the tunnel header length. Valid for GTPv1-U packets.
 * header.
 */
-   #define RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_IP_CS_ERROR \
+   #define RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_TOTAL_ERROR \
(UINT32_C(0x6) << 9)
-   /*
-* Indicates that the L4 checksum failed its check in the tunnel
-* header.
-*/
-   #define RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_L4_CS_ERROR \
-   (UINT32_C(0x7) << 9)
#define RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_LAST \
-   RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_L4_CS_ERROR
+   RX_PKT_V3_CMPL_HI_ERRORS_T_PKT_ERROR_T_TOTAL_ERROR
/*
 * This indicates that there was an error in the inner
 * portion of the packet when this
@@ -6406,20 +6401,8 @@ struct rx_pkt_v3_cmpl_hi {
 */
#define RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
(UINT32_C(0x8) << 12)
-   /*
-* Indicates that the IP checksum failed its check in the
-* inner header.
-*/
-   #define RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_IP_CS_ERROR \
-   (UINT32_C(0x9) << 12)
-   /*
-* Indicates that the L4 checksum failed its check in the
-* inner header.
-*/
-   #define RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_L4_CS_ERROR \
-   (UINT32_C(0xa) << 12)
#define RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_LAST \
-   RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_L4_CS_ERROR
+   RX_PKT_V3_CMPL_HI_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
/*
 * This is data from the CFA block as indicated by the meta_format
 * field.
@@ -14157,7 +14140,7 @@ struct hwrm_func_qcaps_input {
uint8_t unused_0[6];
 } __rte_packed;
 
-/* hwrm_func_qcaps_output (size:896b/112B) */
+/* hwrm_func_qcaps_output (size:1088b/136B) */
 struct hwrm_func_qcaps_output {
/* The specific error status for the command. */
uint16_terror_code;
@@ -14840,9 +14823,85 @@ struct hwrm_func_qcaps_output {
/*
 * When this bit is '1', it indicates that the hardware based
 * link aggregation group (L2 and RoCE) feature is supported.
+* This LAG feature is only supported on the THOR2 or newer NIC
+* with multiple ports.
 */
#define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_HW_LAG_SUPPORTED \
UINT32_C(0x400)
+   /*
+* When this bit is '1', it indicates all contexts can be stored
+* on chip instead of using host based backing store memory.
+*/
+   #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_EXT2_ON_CHIP_CTX_SUPPORTED \
+   UINT32_C(0x800)
+   /*
+* When this bit is '1', it indicates that the HW supports
+* using a steering tag in the memory transactions targeting
+* L2 or RoCE ring resources.
+* Steering Tags are system-specific values that must follow the
+* encoding requirements of the hardware platform. On devices that
+* support steering to multiple address domains, a value of 0 in
+* bit 0 of the steering tag specifies the address is associated
+* with the SOC address space, and a value of 1 indicates t

[PATCH v3 03/14] net/bnxt: log a message when multicast promisc mode changes

2023-12-11 Thread Ajit Khaparde
From: Kalesh AP 

When the user tries to add more number of Mcast MAC addresses than
supported by the port, driver puts port into Mcast promiscuous mode.
It may be useful to the user to know that Mcast promiscuous mode is
turned on.

Similarly added a log when Mcast promiscuous mode is turned off.

Signed-off-by: Kalesh AP 
Reviewed-by: Somnath Kotur 
Reviewed-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt_ethdev.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index acf7e6e46e..f398838ea8 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -2931,12 +2931,18 @@ bnxt_dev_set_mc_addr_list_op(struct rte_eth_dev 
*eth_dev,
bp->nb_mc_addr = nb_mc_addr;
 
if (nb_mc_addr > BNXT_MAX_MC_ADDRS) {
+   PMD_DRV_LOG(INFO, "Number of Mcast MACs added (%u) exceeded Max 
supported (%u)\n",
+   nb_mc_addr, BNXT_MAX_MC_ADDRS);
+   PMD_DRV_LOG(INFO, "Turning on Mcast promiscuous mode\n");
vnic->flags |= BNXT_VNIC_INFO_ALLMULTI;
goto allmulti;
}
 
/* TODO Check for Duplicate mcast addresses */
-   vnic->flags &= ~BNXT_VNIC_INFO_ALLMULTI;
+   if (vnic->flags & BNXT_VNIC_INFO_ALLMULTI) {
+   PMD_DRV_LOG(INFO, "Turning off Mcast promiscuous mode\n");
+   vnic->flags &= ~BNXT_VNIC_INFO_ALLMULTI;
+   }
for (i = 0; i < nb_mc_addr; i++)
rte_ether_addr_copy(&mc_addr_set[i], &bp->mcast_addr_list[i]);
 
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 04/14] net/bnxt: use the correct COS queue for Tx

2023-12-11 Thread Ajit Khaparde
Earlier the firmware was configuring single lossy COS profiles for Tx.
But now more than one profiles is possible.
Identify the profile a NIC driver should use based on the profile type
hint provided in queue_cfg_info.

If the firmware does not set the bit to use profile type,
then we will use the older method to pick the COS queue for Tx.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h  |  1 +
 drivers/net/bnxt/bnxt_hwrm.c | 56 ++--
 drivers/net/bnxt/bnxt_hwrm.h |  7 +
 3 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 0e01b1d4ba..542ef13f7c 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -311,6 +311,7 @@ struct bnxt_link_info {
 struct bnxt_cos_queue_info {
uint8_t id;
uint8_t profile;
+   uint8_t profile_type;
 };
 
 struct rte_flow {
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 0a31b984e6..fe9e629892 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -1544,7 +1544,7 @@ int bnxt_hwrm_port_phy_qcaps(struct bnxt *bp)
return 0;
 }
 
-static bool bnxt_find_lossy_profile(struct bnxt *bp)
+static bool _bnxt_find_lossy_profile(struct bnxt *bp)
 {
int i = 0;
 
@@ -1558,6 +1558,41 @@ static bool bnxt_find_lossy_profile(struct bnxt *bp)
return false;
 }
 
+static bool _bnxt_find_lossy_nic_profile(struct bnxt *bp)
+{
+   int i = 0, j = 0;
+
+   for (i = 0; i < BNXT_COS_QUEUE_COUNT; i++) {
+   for (j = 0; j < BNXT_COS_QUEUE_COUNT; j++) {
+   if (bp->tx_cos_queue[i].profile ==
+   HWRM_QUEUE_SERVICE_PROFILE_LOSSY &&
+   bp->tx_cos_queue[j].profile_type ==
+   HWRM_QUEUE_SERVICE_PROFILE_TYPE_NIC) {
+   bp->tx_cosq_id[0] = bp->tx_cos_queue[i].id;
+   return true;
+   }
+   }
+   }
+   return false;
+}
+
+static bool bnxt_find_lossy_profile(struct bnxt *bp, bool use_prof_type)
+{
+   int i;
+
+   for (i = 0; i < BNXT_COS_QUEUE_COUNT; i++) {
+   PMD_DRV_LOG(DEBUG, "profile %d, profile_id %d, type %d\n",
+   bp->tx_cos_queue[i].profile,
+   bp->tx_cos_queue[i].id,
+   bp->tx_cos_queue[i].profile_type);
+   }
+
+   if (use_prof_type)
+   return _bnxt_find_lossy_nic_profile(bp);
+   else
+   return _bnxt_find_lossy_profile(bp);
+}
+
 static void bnxt_find_first_valid_profile(struct bnxt *bp)
 {
int i = 0;
@@ -1579,6 +1614,7 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
struct hwrm_queue_qportcfg_input req = {.req_type = 0 };
struct hwrm_queue_qportcfg_output *resp = bp->hwrm_cmd_resp_addr;
uint32_t dir = HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX;
+   bool use_prof_type = false;
int i;
 
 get_rx_info:
@@ -1590,10 +1626,15 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
!(bp->vnic_cap_flags & BNXT_VNIC_CAP_COS_CLASSIFY))
req.drv_qmap_cap =
HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED;
+
rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
HWRM_CHECK_RESULT();
 
+   if (resp->queue_cfg_info &
+   HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_USE_PROFILE_TYPE)
+   use_prof_type = true;
+
if (dir == HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX) {
GET_TX_QUEUE_INFO(0);
GET_TX_QUEUE_INFO(1);
@@ -1603,6 +1644,16 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
GET_TX_QUEUE_INFO(5);
GET_TX_QUEUE_INFO(6);
GET_TX_QUEUE_INFO(7);
+   if (use_prof_type) {
+   GET_TX_QUEUE_TYPE_INFO(0);
+   GET_TX_QUEUE_TYPE_INFO(1);
+   GET_TX_QUEUE_TYPE_INFO(2);
+   GET_TX_QUEUE_TYPE_INFO(3);
+   GET_TX_QUEUE_TYPE_INFO(4);
+   GET_TX_QUEUE_TYPE_INFO(5);
+   GET_TX_QUEUE_TYPE_INFO(6);
+   GET_TX_QUEUE_TYPE_INFO(7);
+   }
} else  {
GET_RX_QUEUE_INFO(0);
GET_RX_QUEUE_INFO(1);
@@ -1636,11 +1687,12 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 * operations, ideally we should look to use LOSSY.
 * If not found, fallback to the first valid profile
 */
-   if (!bnxt_find_lossy_profile(bp))
+   if (!bnxt_find_lossy_profile(bp, use_prof_type))
bnxt_find_first_valid_profile(bp);
 
}
}
+   PMD_DRV_LOG(DEBUG, "Tx COS Queue ID %d\n", bp

[PATCH v3 05/14] net/bnxt: refactor mem zone allocation

2023-12-11 Thread Ajit Khaparde
Currently we are allocating memzone for VNIC attributes per VNIC.
In cases where the firmware supports a higher VNIC count, this could
lead to a higher number of memzone segments than supported.

Move the memzone for VNIC attributes per function instead of per
VNIC. Divide the memzone per VNIC as needed.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Somnath Kotur 
Reviewed-by: Kalesh AP 
---
 drivers/net/bnxt/bnxt.h  |  1 +
 drivers/net/bnxt/bnxt_vnic.c | 52 +++-
 drivers/net/bnxt/bnxt_vnic.h |  1 -
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 542ef13f7c..6af668e92f 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -772,6 +772,7 @@ struct bnxt {
 
struct bnxt_vnic_info   *vnic_info;
STAILQ_HEAD(, bnxt_vnic_info)   free_vnic_list;
+   const struct rte_memzone *vnic_rss_mz;
 
struct bnxt_filter_info *filter_info;
STAILQ_HEAD(, bnxt_filter_info) free_filter_list;
diff --git a/drivers/net/bnxt/bnxt_vnic.c b/drivers/net/bnxt/bnxt_vnic.c
index f86d27fd79..d40daf631e 100644
--- a/drivers/net/bnxt/bnxt_vnic.c
+++ b/drivers/net/bnxt/bnxt_vnic.c
@@ -123,13 +123,11 @@ void bnxt_free_vnic_attributes(struct bnxt *bp)
 
for (i = 0; i < bp->max_vnics; i++) {
vnic = &bp->vnic_info[i];
-   if (vnic->rss_mz != NULL) {
-   rte_memzone_free(vnic->rss_mz);
-   vnic->rss_mz = NULL;
-   vnic->rss_hash_key = NULL;
-   vnic->rss_table = NULL;
-   }
+   vnic->rss_hash_key = NULL;
+   vnic->rss_table = NULL;
}
+   rte_memzone_free(bp->vnic_rss_mz);
+   bp->vnic_rss_mz = NULL;
 }
 
 int bnxt_alloc_vnic_attributes(struct bnxt *bp, bool reconfig)
@@ -153,31 +151,35 @@ int bnxt_alloc_vnic_attributes(struct bnxt *bp, bool 
reconfig)
 
entry_length = RTE_CACHE_LINE_ROUNDUP(entry_length + rss_table_size);
 
-   for (i = 0; i < bp->max_vnics; i++) {
-   vnic = &bp->vnic_info[i];
-
-   snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
-"bnxt_" PCI_PRI_FMT "_vnicattr_%d", pdev->addr.domain,
-pdev->addr.bus, pdev->addr.devid, pdev->addr.function, 
i);
-   mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
-   mz = rte_memzone_lookup(mz_name);
-   if (mz == NULL) {
-   mz = rte_memzone_reserve(mz_name,
-entry_length,
+   snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
+"bnxt_" PCI_PRI_FMT "_vnicattr", pdev->addr.domain,
+pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+   mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
+   mz = rte_memzone_lookup(mz_name);
+   if (mz == NULL) {
+   mz = rte_memzone_reserve_aligned(mz_name,
+entry_length * bp->max_vnics,
 bp->eth_dev->device->numa_node,
 RTE_MEMZONE_2MB |
 RTE_MEMZONE_SIZE_HINT_ONLY |
-RTE_MEMZONE_IOVA_CONTIG);
-   if (mz == NULL) {
-   PMD_DRV_LOG(ERR, "Cannot allocate bnxt 
vnic_attributes memory\n");
-   return -ENOMEM;
-   }
+RTE_MEMZONE_IOVA_CONTIG,
+BNXT_PAGE_SIZE);
+   if (mz == NULL) {
+   PMD_DRV_LOG(ERR,
+   "Cannot allocate vnic_attributes memory\n");
+   return -ENOMEM;
}
-   vnic->rss_mz = mz;
-   mz_phys_addr = mz->iova;
+   }
+   bp->vnic_rss_mz = mz;
+   for (i = 0; i < bp->max_vnics; i++) {
+   uint32_t offset = entry_length * i;
+
+   vnic = &bp->vnic_info[i];
+
+   mz_phys_addr = mz->iova + offset;
 
/* Allocate rss table and hash key */
-   vnic->rss_table = (void *)((char *)mz->addr);
+   vnic->rss_table = (void *)((char *)mz->addr + offset);
vnic->rss_table_dma_addr = mz_phys_addr;
memset(vnic->rss_table, -1, entry_length);
 
diff --git a/drivers/net/bnxt/bnxt_vnic.h b/drivers/net/bnxt/bnxt_vnic.h
index 4396d95bda..7a6a0aa739 100644
--- a/drivers/net/bnxt/bnxt_vnic.h
+++ b/drivers/net/bnxt/bnxt_vnic.h
@@ -47,7 +47,6 @@ struct bnxt_vnic_info {
uint16_thash_type;
uint8_t hash_mode;
uint8_t prev_hash_mode;
-   const struct rte_memzone *rss_mz;
rte_iova_t  rss_table_dma_addr;
uint16_t*rss_table;

[PATCH v3 06/14] net/bnxt: add support for p7 device family

2023-12-11 Thread Ajit Khaparde
Add support for the P7 device family.

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt.h| 14 --
 drivers/net/bnxt/bnxt_ethdev.c | 25 +
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 6af668e92f..3a1d8a6ff6 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -72,6 +72,11 @@
 #define BROADCOM_DEV_ID_58814  0xd814
 #define BROADCOM_DEV_ID_58818  0xd818
 #define BROADCOM_DEV_ID_58818_VF   0xd82e
+#define BROADCOM_DEV_ID_57608  0x1760
+#define BROADCOM_DEV_ID_57604  0x1761
+#define BROADCOM_DEV_ID_57602  0x1762
+#define BROADCOM_DEV_ID_57601  0x1763
+#define BROADCOM_DEV_ID_5760X_VF   0x1819
 
 #define BROADCOM_DEV_957508_N2100  0x5208
 #define BROADCOM_DEV_957414_N225   0x4145
@@ -685,6 +690,7 @@ struct bnxt {
 #define BNXT_FLAG_FLOW_XSTATS_EN   BIT(25)
 #define BNXT_FLAG_DFLT_MAC_SET BIT(26)
 #define BNXT_FLAG_GFID_ENABLE  BIT(27)
+#define BNXT_FLAG_CHIP_P7  BIT(30)
 #define BNXT_PF(bp)(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)  ((bp)->flags & BNXT_FLAG_NPAR_PF)
@@ -694,12 +700,16 @@ struct bnxt {
 #define BNXT_USE_KONG(bp)  ((bp)->flags & BNXT_FLAG_KONG_MB_EN)
 #define BNXT_VF_IS_TRUSTED(bp) ((bp)->flags & BNXT_FLAG_TRUSTED_VF_EN)
 #define BNXT_CHIP_P5(bp)   ((bp)->flags & BNXT_FLAG_CHIP_P5)
+#define BNXT_CHIP_P7(bp)   ((bp)->flags & BNXT_FLAG_CHIP_P7)
+#define BNXT_CHIP_P5_P7(bp)(BNXT_CHIP_P5(bp) || BNXT_CHIP_P7(bp))
 #define BNXT_STINGRAY(bp)  ((bp)->flags & BNXT_FLAG_STINGRAY)
-#define BNXT_HAS_NQ(bp)BNXT_CHIP_P5(bp)
-#define BNXT_HAS_RING_GRPS(bp) (!BNXT_CHIP_P5(bp))
+#define BNXT_HAS_NQ(bp)BNXT_CHIP_P5_P7(bp)
+#define BNXT_HAS_RING_GRPS(bp) (!BNXT_CHIP_P5_P7(bp))
 #define BNXT_FLOW_XSTATS_EN(bp)((bp)->flags & BNXT_FLAG_FLOW_XSTATS_EN)
 #define BNXT_HAS_DFLT_MAC_SET(bp)  ((bp)->flags & BNXT_FLAG_DFLT_MAC_SET)
 #define BNXT_GFID_ENABLED(bp)  ((bp)->flags & BNXT_FLAG_GFID_ENABLE)
+#define BNXT_P7_MAX_NQ_RING_CNT512
+#define BNXT_P7_CQ_MAX_L2_ENT  8192
 
uint32_tflags2;
 #define BNXT_FLAGS2_PTP_TIMESYNC_ENABLED   BIT(0)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index f398838ea8..bd30e9fd3e 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -84,6 +84,11 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58814) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58818) },
{ RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_58818_VF) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57608) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57604) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57602) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_57601) },
+   { RTE_PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, BROADCOM_DEV_ID_5760X_VF) },
{ .vendor_id = 0, /* sentinel */ },
 };
 
@@ -4681,6 +4686,7 @@ static bool bnxt_vf_pciid(uint16_t device_id)
case BROADCOM_DEV_ID_57500_VF1:
case BROADCOM_DEV_ID_57500_VF2:
case BROADCOM_DEV_ID_58818_VF:
+   case BROADCOM_DEV_ID_5760X_VF:
/* FALLTHROUGH */
return true;
default:
@@ -4706,7 +4712,23 @@ static bool bnxt_p5_device(uint16_t device_id)
case BROADCOM_DEV_ID_58812:
case BROADCOM_DEV_ID_58814:
case BROADCOM_DEV_ID_58818:
+   /* FALLTHROUGH */
+   return true;
+   default:
+   return false;
+   }
+}
+
+/* Phase 7 device */
+static bool bnxt_p7_device(uint16_t device_id)
+{
+   switch (device_id) {
case BROADCOM_DEV_ID_58818_VF:
+   case BROADCOM_DEV_ID_57608:
+   case BROADCOM_DEV_ID_57604:
+   case BROADCOM_DEV_ID_57602:
+   case BROADCOM_DEV_ID_57601:
+   case BROADCOM_DEV_ID_5760X_VF:
/* FALLTHROUGH */
return true;
default:
@@ -5874,6 +5896,9 @@ static int bnxt_drv_init(struct rte_eth_dev *eth_dev)
if (bnxt_p5_device(pci_dev->id.device_id))
bp->flags |= BNXT_FLAG_CHIP_P5;
 
+   if (bnxt_p7_device(pci_dev->id.device_id))
+   bp->flags |= BNXT_FLAG_CHIP_P7;
+
if (pci_dev->id.device_id == BROADCOM_DEV_ID_58802 ||
pci_dev->id.device_id == BROADCOM_DEV_ID_58804 ||
pci_dev->id.device_id == BROADCOM_DEV_ID_58808 ||
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 07/14] net/bnxt: refactor code to support P7 devices

2023-12-11 Thread Ajit Khaparde
Refactor code to support the P7 device family.
The changes include support for RSS, VNIC allocation, TPA.
Remove unnecessary check to disable vector mode support for
some device families.

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt.h|  6 +++---
 drivers/net/bnxt/bnxt_ethdev.c | 29 +
 drivers/net/bnxt/bnxt_flow.c   |  2 +-
 drivers/net/bnxt/bnxt_hwrm.c   | 26 ++
 drivers/net/bnxt/bnxt_ring.c   |  6 +++---
 drivers/net/bnxt/bnxt_rxq.c|  2 +-
 drivers/net/bnxt/bnxt_rxr.c|  6 +++---
 drivers/net/bnxt/bnxt_vnic.c   |  6 +++---
 8 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 3a1d8a6ff6..7439ecf4fa 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -107,11 +107,11 @@
 #define TPA_MAX_SEGS   5 /* 32 segments in log2 units */
 
 #define BNXT_TPA_MAX_AGGS(bp) \
-   (BNXT_CHIP_P5(bp) ? TPA_MAX_AGGS_TH : \
+   (BNXT_CHIP_P5_P7(bp) ? TPA_MAX_AGGS_TH : \
 TPA_MAX_AGGS)
 
 #define BNXT_TPA_MAX_SEGS(bp) \
-   (BNXT_CHIP_P5(bp) ? TPA_MAX_SEGS_TH : \
+   (BNXT_CHIP_P5_P7(bp) ? TPA_MAX_SEGS_TH : \
  TPA_MAX_SEGS)
 
 /*
@@ -938,7 +938,7 @@ inline uint16_t bnxt_max_rings(struct bnxt *bp)
 * RSS table size in P5 is 512.
 * Cap max Rx rings to the same value for RSS.
 */
-   if (BNXT_CHIP_P5(bp))
+   if (BNXT_CHIP_P5_P7(bp))
max_rx_rings = RTE_MIN(max_rx_rings, BNXT_RSS_TBL_SIZE_P5);
 
max_tx_rings = RTE_MIN(max_tx_rings, max_rx_rings);
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index bd30e9fd3e..d79396b009 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -212,7 +212,7 @@ uint16_t bnxt_rss_ctxts(const struct bnxt *bp)
unsigned int num_rss_rings = RTE_MIN(bp->rx_nr_rings,
 BNXT_RSS_TBL_SIZE_P5);
 
-   if (!BNXT_CHIP_P5(bp))
+   if (!BNXT_CHIP_P5_P7(bp))
return 1;
 
return RTE_ALIGN_MUL_CEIL(num_rss_rings,
@@ -222,7 +222,7 @@ uint16_t bnxt_rss_ctxts(const struct bnxt *bp)
 
 uint16_t bnxt_rss_hash_tbl_size(const struct bnxt *bp)
 {
-   if (!BNXT_CHIP_P5(bp))
+   if (!BNXT_CHIP_P5_P7(bp))
return HW_HASH_INDEX_SIZE;
 
return bnxt_rss_ctxts(bp) * BNXT_RSS_ENTRIES_PER_CTX_P5;
@@ -765,7 +765,7 @@ static int bnxt_start_nic(struct bnxt *bp)
/* P5 does not support ring groups.
 * But we will use the array to save RSS context IDs.
 */
-   if (BNXT_CHIP_P5(bp))
+   if (BNXT_CHIP_P5_P7(bp))
bp->max_ring_grps = BNXT_MAX_RSS_CTXTS_P5;
 
rc = bnxt_vnic_queue_db_init(bp);
@@ -1247,12 +1247,6 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 {
struct bnxt *bp = eth_dev->data->dev_private;
 
-   /* Disable vector mode RX for Stingray2 for now */
-   if (BNXT_CHIP_SR2(bp)) {
-   bp->flags &= ~BNXT_FLAG_RX_VECTOR_PKT_MODE;
-   return bnxt_recv_pkts;
-   }
-
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
/* Vector mode receive cannot be enabled if scattered rx is in use. */
if (eth_dev->data->scattered_rx)
@@ -1317,16 +1311,11 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
 }
 
 static eth_tx_burst_t
-bnxt_transmit_function(struct rte_eth_dev *eth_dev)
+bnxt_transmit_function(__rte_unused struct rte_eth_dev *eth_dev)
 {
-   struct bnxt *bp = eth_dev->data->dev_private;
-
-   /* Disable vector mode TX for Stingray2 for now */
-   if (BNXT_CHIP_SR2(bp))
-   return bnxt_xmit_pkts;
-
 #if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
uint64_t offloads = eth_dev->data->dev_conf.txmode.offloads;
+   struct bnxt *bp = eth_dev->data->dev_private;
 
/*
 * Vector mode transmit can be enabled only if not using scatter rx
@@ -2091,7 +2080,7 @@ static int bnxt_reta_update_op(struct rte_eth_dev 
*eth_dev,
continue;
 
rxq = bnxt_qid_to_rxq(bp, reta_conf[idx].reta[sft]);
-   if (BNXT_CHIP_P5(bp)) {
+   if (BNXT_CHIP_P5_P7(bp)) {
vnic->rss_table[i * 2] =
rxq->rx_ring->rx_ring_struct->fw_ring_id;
vnic->rss_table[i * 2 + 1] =
@@ -2138,7 +2127,7 @@ static int bnxt_reta_query_op(struct rte_eth_dev *eth_dev,
if (reta_conf[idx].mask & (1ULL << sft)) {
uint16_t qid;
 
-   if (BNXT_CHIP_P5(bp))
+   if (BNXT_CHIP_P5_P7(bp))
qid = bnxt_rss_to_qid(bp,
  vnic->rss_table[i * 2]);
else
@@ -3224,7 +3213,7 @@ bnxt_rx_queue_count_op(void *rx_queue)
break;
 

[PATCH v3 08/14] net/bnxt: fix array overflow

2023-12-11 Thread Ajit Khaparde
In some cases the number of elements in the context memory array
can exceed the MAX_CTX_PAGES and that can cause the static members
ctx_pg_arr and ctx_dma_arr to overflow.
Allocate them dynamically to prevent this overflow.

Cc: sta...@dpdk.org
Fixes: f8168ca0e690 ("net/bnxt: support thor controller")
Signed-off-by: Ajit Khaparde 
Reviewed-by: Damodharam Ammepalli 
---
 drivers/net/bnxt/bnxt.h|  4 ++--
 drivers/net/bnxt/bnxt_ethdev.c | 42 +++---
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 7439ecf4fa..3fbdf1ddcc 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -455,8 +455,8 @@ struct bnxt_ring_mem_info {
 
 struct bnxt_ctx_pg_info {
uint32_tentries;
-   void*ctx_pg_arr[MAX_CTX_PAGES];
-   rte_iova_t  ctx_dma_arr[MAX_CTX_PAGES];
+   void**ctx_pg_arr;
+   rte_iova_t  *ctx_dma_arr;
struct bnxt_ring_mem_info ring_mem;
 };
 
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index d79396b009..95f9dd1aa1 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -4767,7 +4767,7 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
 {
struct bnxt_ring_mem_info *rmem = &ctx_pg->ring_mem;
const struct rte_memzone *mz = NULL;
-   char mz_name[RTE_MEMZONE_NAMESIZE];
+   char name[RTE_MEMZONE_NAMESIZE];
rte_iova_t mz_phys_addr;
uint64_t valid_bits = 0;
uint32_t sz;
@@ -4779,6 +4779,19 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
rmem->nr_pages = RTE_ALIGN_MUL_CEIL(mem_size, BNXT_PAGE_SIZE) /
 BNXT_PAGE_SIZE;
rmem->page_size = BNXT_PAGE_SIZE;
+
+   snprintf(name, RTE_MEMZONE_NAMESIZE, "bnxt_ctx_pg_arr%s_%x_%d",
+suffix, idx, bp->eth_dev->data->port_id);
+   ctx_pg->ctx_pg_arr = rte_zmalloc(name, sizeof(void *) * rmem->nr_pages, 
0);
+   if (ctx_pg->ctx_pg_arr == NULL)
+   return -ENOMEM;
+
+   snprintf(name, RTE_MEMZONE_NAMESIZE, "bnxt_ctx_dma_arr%s_%x_%d",
+suffix, idx, bp->eth_dev->data->port_id);
+   ctx_pg->ctx_dma_arr = rte_zmalloc(name, sizeof(rte_iova_t *) * 
rmem->nr_pages, 0);
+   if (ctx_pg->ctx_dma_arr == NULL)
+   return -ENOMEM;
+
rmem->pg_arr = ctx_pg->ctx_pg_arr;
rmem->dma_arr = ctx_pg->ctx_dma_arr;
rmem->flags = BNXT_RMEM_VALID_PTE_FLAG;
@@ -4786,13 +4799,13 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
valid_bits = PTU_PTE_VALID;
 
if (rmem->nr_pages > 1) {
-   snprintf(mz_name, RTE_MEMZONE_NAMESIZE,
+   snprintf(name, RTE_MEMZONE_NAMESIZE,
 "bnxt_ctx_pg_tbl%s_%x_%d",
 suffix, idx, bp->eth_dev->data->port_id);
-   mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
-   mz = rte_memzone_lookup(mz_name);
+   name[RTE_MEMZONE_NAMESIZE - 1] = 0;
+   mz = rte_memzone_lookup(name);
if (!mz) {
-   mz = rte_memzone_reserve_aligned(mz_name,
+   mz = rte_memzone_reserve_aligned(name,
rmem->nr_pages * 8,
bp->eth_dev->device->numa_node,
RTE_MEMZONE_2MB |
@@ -4811,11 +4824,11 @@ static int bnxt_alloc_ctx_mem_blk(struct bnxt *bp,
rmem->pg_tbl_mz = mz;
}
 
-   snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "bnxt_ctx_%s_%x_%d",
+   snprintf(name, RTE_MEMZONE_NAMESIZE, "bnxt_ctx_%s_%x_%d",
 suffix, idx, bp->eth_dev->data->port_id);
-   mz = rte_memzone_lookup(mz_name);
+   mz = rte_memzone_lookup(name);
if (!mz) {
-   mz = rte_memzone_reserve_aligned(mz_name,
+   mz = rte_memzone_reserve_aligned(name,
 mem_size,
 bp->eth_dev->device->numa_node,
 RTE_MEMZONE_1GB |
@@ -4861,6 +4874,17 @@ static void bnxt_free_ctx_mem(struct bnxt *bp)
return;
 
bp->ctx->flags &= ~BNXT_CTX_FLAG_INITED;
+   rte_free(bp->ctx->qp_mem.ctx_pg_arr);
+   rte_free(bp->ctx->srq_mem.ctx_pg_arr);
+   rte_free(bp->ctx->cq_mem.ctx_pg_arr);
+   rte_free(bp->ctx->vnic_mem.ctx_pg_arr);
+   rte_free(bp->ctx->stat_mem.ctx_pg_arr);
+   rte_free(bp->ctx->qp_mem.ctx_dma_arr);
+   rte_free(bp->ctx->srq_mem.ctx_dma_arr);
+   rte_free(bp->ctx->cq_mem.ctx_dma_arr);
+   rte_free(bp->ctx->vnic_mem.ctx_dma_arr);
+   rte_free(bp->ctx->stat_mem.ctx_dma_arr);
+
rte_memzone_free(bp->ctx->qp_mem.ring_mem.mz);
rte_memzone_free(bp->ctx->srq_mem.ring_mem.mz);
rte_memzone_free(bp->ctx->cq_mem.ri

[PATCH v3 09/14] net/bnxt: add support for backing store v2

2023-12-11 Thread Ajit Khaparde
Add backing store v2 changes.
The firmware supports the new backing store scheme for P7
and newer devices.

To support this, the driver queries the different types of chip
contexts the firmware supports and allocates the appropriate size
of memory for the firmware and hardware to use.
The code then goes ahead and frees up the memory during cleanup.

Older P5 device family continues to support the version 1 of
backing store. While the P4 device family does not need any
backing store memory.

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt.h|  69 ++-
 drivers/net/bnxt/bnxt_ethdev.c | 177 --
 drivers/net/bnxt/bnxt_hwrm.c   | 319 +++--
 drivers/net/bnxt/bnxt_hwrm.h   |   8 +
 drivers/net/bnxt/bnxt_util.c   |  10 ++
 drivers/net/bnxt/bnxt_util.h   |   1 +
 6 files changed, 545 insertions(+), 39 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 3fbdf1ddcc..68c4778dc3 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -81,6 +81,11 @@
 #define BROADCOM_DEV_957508_N2100  0x5208
 #define BROADCOM_DEV_957414_N225   0x4145
 
+#define HWRM_SPEC_CODE_1_8_3   0x10803
+#define HWRM_VERSION_1_9_1 0x10901
+#define HWRM_VERSION_1_9_2 0x10903
+#define HWRM_VERSION_1_10_2_13 0x10a020d
+
 #define BNXT_MAX_MTU   9574
 #define BNXT_NUM_VLANS 2
 #define BNXT_MAX_PKT_LEN   (BNXT_MAX_MTU + RTE_ETHER_HDR_LEN +\
@@ -430,16 +435,26 @@ struct bnxt_coal {
 #define BNXT_PAGE_SIZE (1 << BNXT_PAGE_SHFT)
 #define MAX_CTX_PAGES  (BNXT_PAGE_SIZE / 8)
 
+#define BNXT_RTE_MEMZONE_FLAG  (RTE_MEMZONE_1GB | RTE_MEMZONE_IOVA_CONTIG)
+
 #define PTU_PTE_VALID 0x1UL
 #define PTU_PTE_LAST  0x2UL
 #define PTU_PTE_NEXT_TO_LAST  0x4UL
 
+#define BNXT_CTX_MIN   1
+#define BNXT_CTX_INV   0x
+
+#define BNXT_CTX_INIT_VALID(flags) \
+   ((flags) &  \
+HWRM_FUNC_BACKING_STORE_QCAPS_V2_OUTPUT_FLAGS_ENABLE_CTX_KIND_INIT)
+
 struct bnxt_ring_mem_info {
int nr_pages;
int page_size;
uint32_tflags;
 #define BNXT_RMEM_VALID_PTE_FLAG   1
 #define BNXT_RMEM_RING_PTE_FLAG2
+#define BNXT_RMEM_USE_FULL_PAGE_FLAG   4
 
void**pg_arr;
rte_iova_t  *dma_arr;
@@ -460,7 +475,50 @@ struct bnxt_ctx_pg_info {
struct bnxt_ring_mem_info ring_mem;
 };
 
+struct bnxt_ctx_mem {
+   uint16_ttype;
+   uint16_tentry_size;
+   uint32_tflags;
+#define BNXT_CTX_MEM_TYPE_VALID \
+   HWRM_FUNC_BACKING_STORE_QCAPS_V2_OUTPUT_FLAGS_TYPE_VALID
+   uint32_tinstance_bmap;
+   uint8_t init_value;
+   uint8_t entry_multiple;
+   uint16_tinit_offset;
+#defineBNXT_CTX_INIT_INVALID_OFFSET0x
+   uint32_tmax_entries;
+   uint32_tmin_entries;
+   uint8_t last:1;
+   uint8_t split_entry_cnt;
+#define BNXT_MAX_SPLIT_ENTRY   4
+   union {
+   struct {
+   uint32_tqp_l2_entries;
+   uint32_tqp_qp1_entries;
+   uint32_tqp_fast_qpmd_entries;
+   };
+   uint32_tsrq_l2_entries;
+   uint32_tcq_l2_entries;
+   uint32_tvnic_entries;
+   struct {
+   uint32_tmrav_av_entries;
+   uint32_tmrav_num_entries_units;
+   };
+   uint32_tsplit[BNXT_MAX_SPLIT_ENTRY];
+   };
+   struct bnxt_ctx_pg_info *pg_info;
+};
+
+#define BNXT_CTX_FLAG_INITED0x01
+
 struct bnxt_ctx_mem_info {
+   struct bnxt_ctx_mem *ctx_arr;
+   uint32_tsupported_types;
+   uint32_tflags;
+   uint16_ttypes;
+   uint8_t tqm_fp_rings_count;
+
+   /* The following are used for V1 */
uint32_tqp_max_entries;
uint16_tqp_min_qp1_entries;
uint16_tqp_max_l2_entries;
@@ -484,10 +542,6 @@ struct bnxt_ctx_mem_info {
uint16_ttim_entry_size;
uint32_ttim_max_entries;
uint8_t tqm_entries_multiple;
-   uint8_t tqm_fp_rings_count;
-
-   uint32_tflags;
-#define BNXT_CTX_FLAG_INITED0x01
 
struct bnxt_ctx_pg_info qp_mem;
struct bnxt_ctx_pg_info srq_mem;
@@ -739,6 +793,13 @@ struct bnxt {
 #define BNXT_FW_CAP_TRUFLOW_EN BIT(8)
 #define BNXT_FW_CAP_VLAN_TX_INSERT BIT(9)
 #define BNXT_FW_CAP_RX_ALL_PKT_TS  BIT(10)
+#define BNXT_FW_CAP_BACKING_STORE_V2   BIT(12)
+#define BNXT_FW_BACKING_STORE_V2_EN(bp)\
+   ((bp)->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2)
+#define BNXT_FW_BACKING_

[PATCH v3 10/14] net/bnxt: refactor the ulp initialization

2023-12-11 Thread Ajit Khaparde
From: Kishore Padmanabha 

Add new method to consider all the conditions to
check before the ulp could be initialized.

Signed-off-by: Kishore Padmanabha 
Reviewed-by: Ajit Khaparde 
Reviewed-by: Mike Baucom 
---
 drivers/net/bnxt/bnxt_ethdev.c | 28 +++-
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 004b2df4f4..81a30eb983 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -190,6 +190,7 @@ static void bnxt_dev_recover(void *arg);
 static void bnxt_free_error_recovery_info(struct bnxt *bp);
 static void bnxt_free_rep_info(struct bnxt *bp);
 static int bnxt_check_fw_ready(struct bnxt *bp);
+static bool bnxt_enable_ulp(struct bnxt *bp);
 
 int is_bnxt_in_error(struct bnxt *bp)
 {
@@ -1520,7 +1521,8 @@ static int bnxt_dev_stop(struct rte_eth_dev *eth_dev)
return ret;
 
/* delete the bnxt ULP port details */
-   bnxt_ulp_port_deinit(bp);
+   if (bnxt_enable_ulp(bp))
+   bnxt_ulp_port_deinit(bp);
 
bnxt_cancel_fw_health_check(bp);
 
@@ -1641,9 +1643,11 @@ int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
goto error;
 
/* Initialize bnxt ULP port details */
-   rc = bnxt_ulp_port_init(bp);
-   if (rc)
-   goto error;
+   if (bnxt_enable_ulp(bp)) {
+   rc = bnxt_ulp_port_init(bp);
+   if (rc)
+   goto error;
+   }
 
eth_dev->rx_pkt_burst = bnxt_receive_function(eth_dev);
eth_dev->tx_pkt_burst = bnxt_transmit_function(eth_dev);
@@ -3426,7 +3430,7 @@ bnxt_flow_ops_get_op(struct rte_eth_dev *dev,
 */
dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;
 
-   if (BNXT_TRUFLOW_EN(bp))
+   if (bnxt_enable_ulp(bp))
*ops = &bnxt_ulp_rte_flow_ops;
else
*ops = &bnxt_flow_ops;
@@ -,6 +6670,20 @@ struct tf *bnxt_get_tfp_session(struct bnxt *bp, enum 
bnxt_session_type type)
&bp->tfp[BNXT_SESSION_TYPE_REGULAR] : &bp->tfp[type];
 }
 
+/* check if ULP should be enabled or not */
+static bool bnxt_enable_ulp(struct bnxt *bp)
+{
+   /* truflow and MPC should be enabled */
+   /* not enabling ulp for cli and no truflow apps */
+   if (BNXT_TRUFLOW_EN(bp) && bp->app_id != 254 &&
+   bp->app_id != 255) {
+   if (BNXT_CHIP_P7(bp))
+   return false;
+   return true;
+   }
+   return false;
+}
+
 RTE_LOG_REGISTER_SUFFIX(bnxt_logtype_driver, driver, NOTICE);
 RTE_PMD_REGISTER_PCI(net_bnxt, bnxt_rte_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_bnxt, bnxt_pci_id_map);
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 11/14] net/bnxt: modify sending new HWRM commands to firmware

2023-12-11 Thread Ajit Khaparde
If the firmware fails to respond a HWRM command in a certain time,
it may be because the firmware is in a bad state.
Do not send any new HWRM commands in such a scenario.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Damodharam Ammepalli 
---
 drivers/net/bnxt/bnxt.h  | 1 +
 drivers/net/bnxt/bnxt_hwrm.c | 5 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 68c4778dc3..f7a60eb9a1 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -745,6 +745,7 @@ struct bnxt {
 #define BNXT_FLAG_DFLT_MAC_SET BIT(26)
 #define BNXT_FLAG_GFID_ENABLE  BIT(27)
 #define BNXT_FLAG_CHIP_P7  BIT(30)
+#define BNXT_FLAG_FW_TIMEDOUT  BIT(31)
 #define BNXT_PF(bp)(!((bp)->flags & BNXT_FLAG_VF))
 #define BNXT_VF(bp)((bp)->flags & BNXT_FLAG_VF)
 #define BNXT_NPAR(bp)  ((bp)->flags & BNXT_FLAG_NPAR_PF)
diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index a2182af036..1cc2c532dd 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -215,6 +215,10 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void 
*msg,
if (bp->flags & BNXT_FLAG_FATAL_ERROR)
return 0;
 
+   /* If previous HWRM command timed out, donot send new HWRM command */
+   if (bp->flags & BNXT_FLAG_FW_TIMEDOUT)
+   return 0;
+
timeout = bp->hwrm_cmd_timeout;
 
/* Update the message length for backing store config for new FW. */
@@ -315,6 +319,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void 
*msg,
PMD_DRV_LOG(ERR,
"Error(timeout) sending msg 0x%04x, seq_id %d\n",
req->req_type, req->seq_id);
+   bp->flags |= BNXT_FLAG_FW_TIMEDOUT;
return -ETIMEDOUT;
}
return 0;
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 12/14] net/bnxt: retry HWRM ver get if the command fails

2023-12-11 Thread Ajit Khaparde
Retry HWRM ver get if the command timesout because of PCI FLR.
When the PCI driver issues an FLR during device initialization,
the firmware may have to block the PXP target traffic till the FLR
is complete.

HWRM_VER_GET command issued during that window may time out.
So retry the command again in such a scenario.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Kalesh AP 
Reviewed-by: Somnath Kotur 
---
 drivers/net/bnxt/bnxt.h|  1 +
 drivers/net/bnxt/bnxt_ethdev.c | 12 +++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index f7a60eb9a1..7aed4c3da3 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -879,6 +879,7 @@ struct bnxt {
 
 /* default command timeout value of 500ms */
 #define DFLT_HWRM_CMD_TIMEOUT  50
+#define PCI_FUNC_RESET_WAIT_TIMEOUT150
 /* short command timeout value of 50ms */
 #define SHORT_HWRM_CMD_TIMEOUT 5
/* default HWRM request timeout value */
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 81a30eb983..75e968394f 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -5441,6 +5441,7 @@ static int bnxt_map_hcomm_fw_status_reg(struct bnxt *bp)
 static int bnxt_get_config(struct bnxt *bp)
 {
uint16_t mtu;
+   int timeout;
int rc = 0;
 
bp->fw_cap = 0;
@@ -5449,8 +5450,17 @@ static int bnxt_get_config(struct bnxt *bp)
if (rc)
return rc;
 
-   rc = bnxt_hwrm_ver_get(bp, DFLT_HWRM_CMD_TIMEOUT);
+   timeout = BNXT_CHIP_P7(bp) ?
+ PCI_FUNC_RESET_WAIT_TIMEOUT :
+ DFLT_HWRM_CMD_TIMEOUT;
+try_again:
+   rc = bnxt_hwrm_ver_get(bp, timeout);
if (rc) {
+   if (rc == -ETIMEDOUT && timeout == PCI_FUNC_RESET_WAIT_TIMEOUT) 
{
+   bp->flags &= ~BNXT_FLAG_FW_TIMEDOUT;
+   timeout = DFLT_HWRM_CMD_TIMEOUT;
+   goto try_again;
+   }
bnxt_check_fw_status(bp);
return rc;
}
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 13/14] net/bnxt: cap ring resources for P7 devices

2023-12-11 Thread Ajit Khaparde
Cap the NQ count for P7 devices.
Driver does not need a high NQ ring count anyway since we operate in
poll mode.

Signed-off-by: Ajit Khaparde 
Reviewed-by: Kalesh AP 
Reviewed-by: Damodharam Ammepalli 
---
 drivers/net/bnxt/bnxt_hwrm.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bnxt/bnxt_hwrm.c b/drivers/net/bnxt/bnxt_hwrm.c
index 1cc2c532dd..e56f7693af 100644
--- a/drivers/net/bnxt/bnxt_hwrm.c
+++ b/drivers/net/bnxt/bnxt_hwrm.c
@@ -1237,7 +1237,10 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp)
else
bp->max_vnics = rte_le_to_cpu_16(resp->max_vnics);
bp->max_stat_ctx = rte_le_to_cpu_16(resp->max_stat_ctx);
-   bp->max_nq_rings = rte_le_to_cpu_16(resp->max_msix);
+   if (BNXT_CHIP_P7(bp))
+   bp->max_nq_rings = BNXT_P7_MAX_NQ_RING_CNT;
+   else
+   bp->max_nq_rings = rte_le_to_cpu_16(resp->max_msix);
bp->vf_resv_strategy = rte_le_to_cpu_16(resp->vf_reservation_strategy);
if (bp->vf_resv_strategy >
HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESV_STRATEGY_MINIMAL_STATIC)
-- 
2.39.2 (Apple Git-143)



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH v3 14/14] net/bnxt: add support for v3 Rx completion

2023-12-11 Thread Ajit Khaparde
P7 devices support the newer Rx completion version.
This Rx completion though similar to the previous generation,
provides some extra information for flow offload scenarios
apart from the normal information.

Signed-off-by: Ajit Khaparde 
---
 drivers/net/bnxt/bnxt_rxr.c | 87 ++-
 drivers/net/bnxt/bnxt_rxr.h | 92 +
 2 files changed, 177 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_rxr.c b/drivers/net/bnxt/bnxt_rxr.c
index 9d45065f28..59ea0121de 100644
--- a/drivers/net/bnxt/bnxt_rxr.c
+++ b/drivers/net/bnxt/bnxt_rxr.c
@@ -553,6 +553,41 @@ bnxt_parse_pkt_type(struct rx_pkt_cmpl *rxcmp, struct 
rx_pkt_cmpl_hi *rxcmp1)
return bnxt_ptype_table[index];
 }
 
+static void
+bnxt_parse_pkt_type_v3(struct rte_mbuf *mbuf,
+  struct rx_pkt_cmpl *rxcmp_v1,
+  struct rx_pkt_cmpl_hi *rxcmp1_v1)
+{
+   uint32_t flags_type, flags2, meta;
+   struct rx_pkt_v3_cmpl_hi *rxcmp1;
+   struct rx_pkt_v3_cmpl *rxcmp;
+   uint8_t index;
+
+   rxcmp = (void *)rxcmp_v1;
+   rxcmp1 = (void *)rxcmp1_v1;
+
+   flags_type = rte_le_to_cpu_16(rxcmp->flags_type);
+   flags2 = rte_le_to_cpu_32(rxcmp1->flags2);
+   meta = rte_le_to_cpu_32(rxcmp->metadata1_payload_offset);
+
+   /* TODO */
+   /* Validate ptype table indexing at build time. */
+   /* bnxt_check_ptype_constants_v3(); */
+
+   /*
+* Index format:
+* bit 0: Set if IP tunnel encapsulated packet.
+* bit 1: Set if IPv6 packet, clear if IPv4.
+* bit 2: Set if VLAN tag present.
+* bits 3-6: Four-bit hardware packet type field.
+*/
+   index = BNXT_CMPL_V3_ITYPE_TO_IDX(flags_type) |
+   BNXT_CMPL_V3_VLAN_TO_IDX(meta) |
+   BNXT_CMPL_V3_IP_VER_TO_IDX(flags2);
+
+   mbuf->packet_type = bnxt_ptype_table[index];
+}
+
 static void __rte_cold
 bnxt_init_ol_flags_tables(struct bnxt_rx_queue *rxq)
 {
@@ -716,6 +751,43 @@ bnxt_get_rx_ts_p5(struct bnxt *bp, uint32_t rx_ts_cmpl)
ptp->rx_timestamp = pkt_time;
 }
 
+static uint32_t
+bnxt_ulp_set_mark_in_mbuf_v3(struct bnxt *bp, struct rx_pkt_cmpl_hi *rxcmp1,
+struct rte_mbuf *mbuf, uint32_t *vfr_flag)
+{
+   struct rx_pkt_v3_cmpl_hi *rxcmp1_v3 = (void *)rxcmp1;
+   uint32_t flags2, meta, mark_id = 0;
+   /* revisit the usage of gfid/lfid if mark action is supported.
+* for now, only VFR is using mark and the metadata is the SVIF
+* (a small number)
+*/
+   bool gfid = false;
+   int rc = 0;
+
+   flags2 = rte_le_to_cpu_32(rxcmp1_v3->flags2);
+
+   switch (flags2 & RX_PKT_V3_CMPL_HI_FLAGS2_META_FORMAT_MASK) {
+   case RX_PKT_V3_CMPL_HI_FLAGS2_META_FORMAT_CHDR_DATA:
+   /* Only supporting Metadata for ulp now */
+   meta = rxcmp1_v3->metadata2;
+   break;
+   default:
+   goto skip_mark;
+   }
+
+   rc = ulp_mark_db_mark_get(bp->ulp_ctx, gfid, meta, vfr_flag, &mark_id);
+   if (!rc) {
+   /* Only supporting VFR for now, no Mark actions */
+   if (vfr_flag && *vfr_flag)
+   return mark_id;
+   }
+
+skip_mark:
+   mbuf->hash.fdir.hi = 0;
+
+   return 0;
+}
+
 static uint32_t
 bnxt_ulp_set_mark_in_mbuf(struct bnxt *bp, struct rx_pkt_cmpl_hi *rxcmp1,
  struct rte_mbuf *mbuf, uint32_t *vfr_flag)
@@ -892,7 +964,8 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
*rx_pkt = mbuf;
goto next_rx;
} else if ((cmp_type != CMPL_BASE_TYPE_RX_L2) &&
-  (cmp_type != CMPL_BASE_TYPE_RX_L2_V2)) {
+  (cmp_type != CMPL_BASE_TYPE_RX_L2_V2) &&
+  (cmp_type != CMPL_BASE_TYPE_RX_L2_V3)) {
rc = -EINVAL;
goto next_rx;
}
@@ -929,6 +1002,16 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
  bp->ptp_all_rx_tstamp)
bnxt_get_rx_ts_p5(rxq->bp, rxcmp1->reorder);
 
+   if (cmp_type == CMPL_BASE_TYPE_RX_L2_V3) {
+   bnxt_parse_csum_v3(mbuf, rxcmp1);
+   bnxt_parse_pkt_type_v3(mbuf, rxcmp, rxcmp1);
+   bnxt_rx_vlan_v3(mbuf, rxcmp, rxcmp1);
+   if (BNXT_TRUFLOW_EN(bp))
+   mark_id = bnxt_ulp_set_mark_in_mbuf_v3(rxq->bp, rxcmp1,
+  mbuf, &vfr_flag);
+   goto reuse_rx_mbuf;
+   }
+
if (cmp_type == CMPL_BASE_TYPE_RX_L2_V2) {
bnxt_parse_csum_v2(mbuf, rxcmp1);
bnxt_parse_pkt_type_v2(mbuf, rxcmp, rxcmp1);
@@ -1066,7 +1149,7 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf 
**rx_pkts,
if (CMP_TYPE(rxcmp) == CMPL_BASE_TYPE_HWRM_DONE) {
PMD_DRV_LOG(ERR, "Rx flush done\n");
   

[PATCH 0/3] replace use of EAL logtype

2023-12-11 Thread Stephen Hemminger
There are some places EAL logtype is being used in testpmd
and examples where it should not be. Lets reserve EAL
logtype to only be used by DPDK internals.

Stephen Hemminger (3):
  testpmd: replace EAL logtype with fprintf
  examples/l2fwd-keepalive: don't use EAL logtype
  examples/vm_power_manager: do not use EAL logtype

 app/test-pmd/testpmd.c   | 33 +++-
 examples/l2fwd-keepalive/shm.c   | 21 +++-
 examples/vm_power_manager/main.c | 11 ---
 3 files changed, 22 insertions(+), 43 deletions(-)

-- 
2.42.0



[PATCH 1/3] testpmd: replace EAL logtype with fprintf

2023-12-11 Thread Stephen Hemminger
Testpmd is misusing EAL logtype for its own errors.
Since the code directly calls fprintf in other places,
change to use that everywhere. This has the added benefit
of not having testpmd output clutter up syslog().

Signed-off-by: Stephen Hemminger 
---
 app/test-pmd/testpmd.c | 33 +++--
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 9e4e99e53b9a..aa350d61e451 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -3413,8 +3413,7 @@ stop_port(portid_t pid)
 
ret = eth_dev_stop_mp(pi);
if (ret != 0) {
-   RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port 
%u\n",
-   pi);
+   fprintf(stderr, "rte_eth_dev_stop failed for port 
%u\n", pi);
/* Allow to retry stopping the port. */
port->port_status = RTE_PORT_STARTED;
continue;
@@ -3812,23 +3811,20 @@ pmd_test_exit(void)
if (hot_plug) {
ret = rte_dev_event_monitor_stop();
if (ret) {
-   RTE_LOG(ERR, EAL,
-   "fail to stop device event monitor.");
+   fprintf(stderr, "fail to stop device event monitor.");
return;
}
 
ret = rte_dev_event_callback_unregister(NULL,
dev_event_callback, NULL);
if (ret < 0) {
-   RTE_LOG(ERR, EAL,
-   "fail to unregister device event callback.\n");
+   fprintf(stderr, "fail to unregister device event 
callback.\n");
return;
}
 
ret = rte_dev_hotplug_handle_disable();
if (ret) {
-   RTE_LOG(ERR, EAL,
-   "fail to disable hotplug handling.\n");
+   fprintf(stderr, "fail to disable hotplug handling.\n");
return;
}
}
@@ -4062,12 +4058,10 @@ dev_event_callback(const char *device_name, enum 
rte_dev_event_type type,
 
switch (type) {
case RTE_DEV_EVENT_REMOVE:
-   RTE_LOG(DEBUG, EAL, "The device: %s has been removed!\n",
-   device_name);
+   fprintf(stderr, "The device: %s has been removed!\n", 
device_name);
ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
if (ret) {
-   RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
-   device_name);
+   fprintf(stderr, "Can not get port by device %s!\n", 
device_name);
return;
}
/*
@@ -4081,12 +4075,10 @@ dev_event_callback(const char *device_name, enum 
rte_dev_event_type type,
 */
if (rte_eal_alarm_set(10,
rmv_port_callback, (void *)(intptr_t)port_id))
-   RTE_LOG(ERR, EAL,
-   "Could not set up deferred device removal\n");
+   fprintf(stderr, "Could not set up deferred device 
removal\n");
break;
case RTE_DEV_EVENT_ADD:
-   RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
-   device_name);
+   fprintf(stderr, "The device: %s has been added!\n", 
device_name);
/* TODO: After finish kernel driver binding,
 * begin to attach port.
 */
@@ -4632,23 +4624,20 @@ main(int argc, char** argv)
if (hot_plug) {
ret = rte_dev_hotplug_handle_enable();
if (ret) {
-   RTE_LOG(ERR, EAL,
-   "fail to enable hotplug handling.");
+   fprintf(stderr, "fail to enable hotplug handling.");
return -1;
}
 
ret = rte_dev_event_monitor_start();
if (ret) {
-   RTE_LOG(ERR, EAL,
-   "fail to start device event monitoring.");
+   fprintf(stderr, "fail to start device event 
monitoring.");
return -1;
}
 
ret = rte_dev_event_callback_register(NULL,
dev_event_callback, NULL);
if (ret) {
-   RTE_LOG(ERR, EAL,
-   "fail  to register device event callback\n");
+   fprintf(stderr, "fail  to register device event 
callback\n");
return -1;
}
}
-- 
2.42.0



[PATCH 2/3] examples/l2fwd-keepalive: don't use EAL logtype

2023-12-11 Thread Stephen Hemminger
EAL logtype should be reserved for EAL library.
This example is already using printf() so just print
errors to stderr.

Signed-off-by: Stephen Hemminger 
---
 examples/l2fwd-keepalive/shm.c | 21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/examples/l2fwd-keepalive/shm.c b/examples/l2fwd-keepalive/shm.c
index 7c7a9ea8ea3c..cbeeb511ef07 100644
--- a/examples/l2fwd-keepalive/shm.c
+++ b/examples/l2fwd-keepalive/shm.c
@@ -5,7 +5,6 @@
 #include 
 
 #include 
-#include 
 #include 
 
 #include "shm.h"
@@ -28,28 +27,23 @@ struct rte_keepalive_shm *rte_keepalive_shm_create(void)
fd = shm_open(RTE_KEEPALIVE_SHM_NAME,
O_CREAT | O_TRUNC | O_RDWR, 0666);
if (fd < 0)
-   RTE_LOG(INFO, EAL,
-   "Failed to open %s as SHM (%s)\n",
-   RTE_KEEPALIVE_SHM_NAME,
-   strerror(errno));
+   fprintf(stderr, "Failed to open %s as SHM (%s)\n",
+   RTE_KEEPALIVE_SHM_NAME, strerror(errno));
else if (ftruncate(fd, sizeof(struct rte_keepalive_shm)) != 0)
-   RTE_LOG(INFO, EAL,
-   "Failed to resize SHM (%s)\n", strerror(errno));
+   fprintf(stderr, "Failed to resize SHM (%s)\n", strerror(errno));
else {
ka_shm = (struct rte_keepalive_shm *) mmap(
0, sizeof(struct rte_keepalive_shm),
PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
if (ka_shm == MAP_FAILED)
-   RTE_LOG(INFO, EAL,
-   "Failed to mmap SHM (%s)\n", strerror(errno));
+   fprintf(stderr, "Failed to mmap SHM (%s)\n", 
strerror(errno));
else {
memset(ka_shm, 0, sizeof(struct rte_keepalive_shm));
 
/* Initialize the semaphores for IPC/SHM use */
if (sem_init(&ka_shm->core_died, 1, 0) != 0) {
-   RTE_LOG(INFO, EAL,
-   "Failed to setup SHM semaphore (%s)\n",
+   fprintf(stderr, "Failed to setup SHM semaphore 
(%s)\n",
strerror(errno));
munmap(ka_shm,
sizeof(struct rte_keepalive_shm));
@@ -87,7 +81,7 @@ void rte_keepalive_relayed_state(struct rte_keepalive_shm 
*shm,
 * ka_agent is not active.
 */
if (sem_getvalue(&shm->core_died, &count) == -1) {
-   RTE_LOG(INFO, EAL, "Semaphore check failed(%s)\n",
+   fprintf(stderr, "Semaphore check failed(%s)\n",
strerror(errno));
return;
}
@@ -95,8 +89,7 @@ void rte_keepalive_relayed_state(struct rte_keepalive_shm 
*shm,
return;
 
if (sem_post(&shm->core_died) != 0)
-   RTE_LOG(INFO, EAL,
-   "Failed to increment semaphore (%s)\n",
+   fprintf(stderr, "Failed to increment semaphore (%s)\n",
strerror(errno));
}
 }
-- 
2.42.0



[PATCH 3/3] examples/vm_power_manager: do not use EAL logtype

2023-12-11 Thread Stephen Hemminger
Be consistent for all the error printouts and use fprintf().
The EAL logtype is reserved for internal use by EAL.

Signed-off-by: Stephen Hemminger 
---
 examples/vm_power_manager/main.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c
index b159291d77ce..c14138202004 100644
--- a/examples/vm_power_manager/main.c
+++ b/examples/vm_power_manager/main.c
@@ -17,7 +17,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -425,8 +424,7 @@ main(int argc, char **argv)
 
lcore_id = rte_get_next_lcore(-1, 1, 0);
if (lcore_id == RTE_MAX_LCORE) {
-   RTE_LOG(ERR, EAL, "A minimum of three cores are required to run 
"
-   "application\n");
+   fprintf(stderr, "A minimum of three cores are required to run 
application\n");
return 0;
}
printf("Running channel monitor on lcore id %d\n", lcore_id);
@@ -434,16 +432,15 @@ main(int argc, char **argv)
 
lcore_id = rte_get_next_lcore(lcore_id, 1, 0);
if (lcore_id == RTE_MAX_LCORE) {
-   RTE_LOG(ERR, EAL, "A minimum of three cores are required to run 
"
-   "application\n");
+   fprintf(stderr, "A minimum of three cores are required to run 
application\n");
return 0;
}
if (power_manager_init() < 0) {
-   printf("Unable to initialize power manager\n");
+   fprintf(stderr, "Unable to initialize power manager\n");
return -1;
}
if (channel_manager_init(CHANNEL_MGR_DEFAULT_HV_PATH) < 0) {
-   printf("Unable to initialize channel manager\n");
+   fprintf(stderr, "Unable to initialize channel manager\n");
return -1;
}
 
-- 
2.42.0



Re: [PATCH 1/2] eal: fix constraints on stdatomic API

2023-12-11 Thread Tyler Retzlaff
On Mon, Dec 11, 2023 at 03:39:03PM +0800, Jie Hai wrote:
> The first parameter of rte_atomic_exchange_explicit() must be a
> pointer to _Atomic type. If run command "meson setup --werror
> -Denable_stdatomic=true build && ninja -C build", error will occur.
> Thia patch fixes it.
> 
> Fixes: 1ec6a845b5cb ("eal: use stdatomic API in public headers")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Jie Hai 
> ---
>  app/test/test_atomic.c   |  6 +++---
>  lib/eal/include/generic/rte_atomic.h | 12 ++--
>  2 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
> index db07159e81ab..c3cb3ae0ea57 100644
> --- a/app/test/test_atomic.c
> +++ b/app/test/test_atomic.c
> @@ -347,9 +347,9 @@ typedef union {
>  const uint8_t CRC8_POLY = 0x91;
>  uint8_t crc8_table[256];
>  
> -volatile uint16_t token16;
> -volatile uint32_t token32;
> -volatile uint64_t token64;
> +volatile RTE_ATOMIC(uint16_t) token16;
> +volatile RTE_ATOMIC(uint32_t) token32;
> +volatile RTE_ATOMIC(uint64_t) token64;

subject to my comment below, volatile qualification can be removed.

>  
>  static void
>  build_crc8_table(void)
> diff --git a/lib/eal/include/generic/rte_atomic.h 
> b/lib/eal/include/generic/rte_atomic.h
> index 0e639dad76a4..38c3b41f9c68 100644
> --- a/lib/eal/include/generic/rte_atomic.h
> +++ b/lib/eal/include/generic/rte_atomic.h
> @@ -207,11 +207,11 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t 
> exp, uint16_t src)
>   *   The original value at that location
>   */
>  static inline uint16_t
> -rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
> +rte_atomic16_exchange(volatile RTE_ATOMIC(uint16_t) *dst, uint16_t val);

the existing rte_atomicNN (the old non-standard ones) are deprecated and will
be eventually removed so there isn't a lot of value in churning their
signatures to wrap the rte_stdatomic macros.

the right thing to do here to just change the calling code to use the generic
rte_stdatomic macros directly so we can eventually remove
rte_atomicNN_xxx.

ty



Re: [PATCH 0/3] replace use of EAL logtype

2023-12-11 Thread Tyler Retzlaff
On Mon, Dec 11, 2023 at 09:23:22AM -0800, Stephen Hemminger wrote:
> There are some places EAL logtype is being used in testpmd
> and examples where it should not be. Lets reserve EAL
> logtype to only be used by DPDK internals.
> 
> Stephen Hemminger (3):
>   testpmd: replace EAL logtype with fprintf
>   examples/l2fwd-keepalive: don't use EAL logtype
>   examples/vm_power_manager: do not use EAL logtype
> 
>  app/test-pmd/testpmd.c   | 33 +++-
>  examples/l2fwd-keepalive/shm.c   | 21 +++-
>  examples/vm_power_manager/main.c | 11 ---

are the log types under ALLOW_INTERNAL_API? if not should they be? and
if they are should examples be defining ALLOW_INTERNAL_API?

just curious.



[PATCH] cryptodev: convert to dynamic logtype

2023-12-11 Thread Stephen Hemminger
The cryptodev logs are all referenced via rte_crytpodev.h
so make it dynamic there.

Signed-off-by: Stephen Hemminger 
---
 lib/cryptodev/rte_cryptodev.c | 2 ++
 lib/cryptodev/rte_cryptodev.h | 2 ++
 lib/cryptodev/version.map | 1 +
 lib/log/log.c | 1 -
 lib/log/rte_log.h | 2 +-
 5 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/cryptodev/rte_cryptodev.c b/lib/cryptodev/rte_cryptodev.c
index b25882773421..25e3ec12d1df 100644
--- a/lib/cryptodev/rte_cryptodev.c
+++ b/lib/cryptodev/rte_cryptodev.c
@@ -49,6 +49,8 @@ struct rte_crypto_fp_ops 
rte_crypto_fp_ops[RTE_CRYPTO_MAX_DEVS];
 /* spinlock for crypto device callbacks */
 static rte_spinlock_t rte_cryptodev_cb_lock = RTE_SPINLOCK_INITIALIZER;
 
+RTE_LOG_REGISTER_DEFAULT(rte_cryptodev_logtype, INFO);
+
 /**
  * The user application callback description.
  *
diff --git a/lib/cryptodev/rte_cryptodev.h b/lib/cryptodev/rte_cryptodev.h
index aaeaf294e6bb..82b711395a5b 100644
--- a/lib/cryptodev/rte_cryptodev.h
+++ b/lib/cryptodev/rte_cryptodev.h
@@ -29,6 +29,8 @@ extern "C" {
 extern const char **rte_cyptodev_names;
 
 /* Logging Macros */
+extern int rte_cryptodev_logtype;
+#define RTE_LOGTYPE_CRYPTODEV  rte_cryptodev_logtype
 
 #define CDEV_LOG_ERR(...) \
RTE_LOG(ERR, CRYPTODEV, \
diff --git a/lib/cryptodev/version.map b/lib/cryptodev/version.map
index c39199be54f5..54360a5da538 100644
--- a/lib/cryptodev/version.map
+++ b/lib/cryptodev/version.map
@@ -44,6 +44,7 @@ DPDK_24 {
rte_cryptodev_get_sec_ctx;
rte_cryptodev_info_get;
rte_cryptodev_is_valid_dev;
+   rte_cryptodev_logtype;
rte_cryptodev_name_get;
rte_cryptodev_queue_pair_count;
rte_cryptodev_queue_pair_setup;
diff --git a/lib/log/log.c b/lib/log/log.c
index e3cd4cff0fbc..ab06132a98a1 100644
--- a/lib/log/log.c
+++ b/lib/log/log.c
@@ -356,7 +356,6 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_PORT,   "lib.port"},
{RTE_LOGTYPE_TABLE,  "lib.table"},
{RTE_LOGTYPE_PIPELINE,   "lib.pipeline"},
-   {RTE_LOGTYPE_CRYPTODEV,  "lib.cryptodev"},
{RTE_LOGTYPE_EVENTDEV,   "lib.eventdev"},
{RTE_LOGTYPE_USER1,  "user1"},
{RTE_LOGTYPE_USER2,  "user2"},
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index 27fb6129a7aa..2d5eb23eeedf 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -43,7 +43,7 @@ extern "C" {
 #define RTE_LOGTYPE_TABLE 14 /**< Log related to table. */
 #define RTE_LOGTYPE_PIPELINE  15 /**< Log related to pipeline. */
 /* was RTE_LOGTYPE_MBUF */
-#define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
+/* was RTE_LOGTYPE_CRYPTODEV */
 /* was RTE_LOGTYPE_EFD */
 #define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
 /* was RTE_LOGTYPE_GSO */
-- 
2.42.0



[PATCH] eventdev: replace RTE_LOGTYPE_EVENTDEV with a dynamic type

2023-12-11 Thread Stephen Hemminger
With a little setup in eventdev_pmd.h the eventdev drivers
and API can be converted to dynamic log type.

Signed-off-by: Stephen Hemminger 
---
 lib/eventdev/eventdev_pmd.h | 3 +++
 lib/eventdev/rte_eventdev.c | 2 ++
 lib/eventdev/version.map| 1 +
 lib/log/log.c   | 1 -
 lib/log/rte_log.h   | 2 +-
 5 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/eventdev/eventdev_pmd.h b/lib/eventdev/eventdev_pmd.h
index 30bd90085c44..52dc1a1d79fb 100644
--- a/lib/eventdev/eventdev_pmd.h
+++ b/lib/eventdev/eventdev_pmd.h
@@ -31,6 +31,9 @@ extern "C" {
 #include "event_timer_adapter_pmd.h"
 #include "rte_eventdev.h"
 
+extern int rte_event_logtype;
+#define RTE_LOGTYPE_EVENTDEV rte_event_logtype
+
 /* Logging Macros */
 #define RTE_EDEV_LOG_ERR(...) \
RTE_LOG(ERR, EVENTDEV, \
diff --git a/lib/eventdev/rte_eventdev.c b/lib/eventdev/rte_eventdev.c
index 0ca32d672175..10ea7f626e66 100644
--- a/lib/eventdev/rte_eventdev.c
+++ b/lib/eventdev/rte_eventdev.c
@@ -28,6 +28,8 @@
 #include "eventdev_pmd.h"
 #include "eventdev_trace.h"
 
+RTE_LOG_REGISTER_DEFAULT(rte_event_logtype, INFO);
+
 static struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS];
 
 struct rte_eventdev *rte_eventdevs = rte_event_devices;
diff --git a/lib/eventdev/version.map b/lib/eventdev/version.map
index 3d5c6c2b835c..964cd6444169 100644
--- a/lib/eventdev/version.map
+++ b/lib/eventdev/version.map
@@ -75,6 +75,7 @@ DPDK_24 {
rte_event_eth_tx_adapter_stats_reset;
rte_event_eth_tx_adapter_stop;
rte_event_fp_ops;
+   rte_event_logtype;
rte_event_port_attr_get;
rte_event_port_default_conf_get;
rte_event_port_link;
diff --git a/lib/log/log.c b/lib/log/log.c
index e3cd4cff0fbc..6314b6ba8e30 100644
--- a/lib/log/log.c
+++ b/lib/log/log.c
@@ -357,7 +357,6 @@ static const struct logtype logtype_strings[] = {
{RTE_LOGTYPE_TABLE,  "lib.table"},
{RTE_LOGTYPE_PIPELINE,   "lib.pipeline"},
{RTE_LOGTYPE_CRYPTODEV,  "lib.cryptodev"},
-   {RTE_LOGTYPE_EVENTDEV,   "lib.eventdev"},
{RTE_LOGTYPE_USER1,  "user1"},
{RTE_LOGTYPE_USER2,  "user2"},
{RTE_LOGTYPE_USER3,  "user3"},
diff --git a/lib/log/rte_log.h b/lib/log/rte_log.h
index 27fb6129a7aa..4bdb4e518a71 100644
--- a/lib/log/rte_log.h
+++ b/lib/log/rte_log.h
@@ -45,7 +45,7 @@ extern "C" {
 /* was RTE_LOGTYPE_MBUF */
 #define RTE_LOGTYPE_CRYPTODEV 17 /**< Log related to cryptodev. */
 /* was RTE_LOGTYPE_EFD */
-#define RTE_LOGTYPE_EVENTDEV  19 /**< Log related to eventdev. */
+/* was RTE_LOGTYPE_EVENTDEV */
 /* was RTE_LOGTYPE_GSO */
 
 /* these log types can be used in an application */
-- 
2.42.0



Sending multiple patches for a feature work.

2023-12-11 Thread Aditya Ambadipudi
Hello all.

My name is Aditya Ambadipudi. I work at Arm. I and my boss (Honnappa 
Nagarahalli) have been working on the Deque library that he talked about 
earlier in another email several weeks/months ago.

Essentially, he wrote out all the functions signatures and stuff. He also 
implemented a few of these functions. I was tasked with implementing the 
remaining features.

We were wondering what the best way would be to mail all of these changes. 
Locally we have two commits. The one that my boss created and the one that I 
created.

I was wondering if it was possible to mail this whole thing as two separate 
patches. If we squash both of our commits together it can only have a single 
author name.

Both of us have individually written a lot of code for this project. I have 
written around 2000 lines so far and my boss has written close to a 1000.  What 
is the best way to mail these patches such that both of us are recognized for 
the work we have done.

Thank you,
Aditya Ambadipudi.


[PATCH] net/iavf: support rte flow with mask for FDIR

2023-12-11 Thread Zhichao Zeng
This patch supports rte flow with mask for FDIR, including
eth/ipv4/ipv6/tcp/udp flow items.

Signed-off-by: Zhichao Zeng 
---
 drivers/net/iavf/iavf_fdir.c | 419 ---
 1 file changed, 243 insertions(+), 176 deletions(-)

diff --git a/drivers/net/iavf/iavf_fdir.c b/drivers/net/iavf/iavf_fdir.c
index 811a10287b..df5359892c 100644
--- a/drivers/net/iavf/iavf_fdir.c
+++ b/drivers/net/iavf/iavf_fdir.c
@@ -742,6 +742,7 @@ iavf_fdir_parse_pattern(__rte_unused struct iavf_adapter 
*ad,
const struct rte_flow_item_ppp *ppp_spec, *ppp_mask;
const struct rte_flow_item *item = pattern;
struct virtchnl_proto_hdr *hdr, *hdr1 = NULL;
+   struct virtchnl_proto_hdr_w_msk *hdr_w_msk, *hdr1_w_msk = NULL;
struct rte_ecpri_common_hdr ecpri_common;
uint64_t input_set = IAVF_INSET_NONE;
enum rte_flow_item_type item_type;
@@ -749,6 +750,7 @@ iavf_fdir_parse_pattern(__rte_unused struct iavf_adapter 
*ad,
uint8_t tun_inner = 0;
uint16_t ether_type, flags_version;
uint8_t item_num = 0;
+   int with_mask = 0;
int layer = 0;
 
uint8_t  ipv6_addr_mask[16] = {
@@ -838,8 +840,10 @@ iavf_fdir_parse_pattern(__rte_unused struct iavf_adapter 
*ad,
next_type = (item + 1)->type;
 
hdr1 = &hdrs->proto_hdr[layer];
+   hdr1_w_msk = &hdrs->proto_hdr_w_msk[layer];
 
VIRTCHNL_SET_PROTO_HDR_TYPE(hdr1, ETH);
+   VIRTCHNL_SET_PROTO_HDR_TYPE(hdr1_w_msk, ETH);
 
if (next_type == RTE_FLOW_ITEM_TYPE_END &&
(!eth_spec || !eth_mask)) {
@@ -850,43 +854,60 @@ iavf_fdir_parse_pattern(__rte_unused struct iavf_adapter 
*ad,
}
 
if (eth_spec && eth_mask) {
-   if 
(!rte_is_zero_ether_addr(ð_mask->hdr.dst_addr)) {
-   input_set |= IAVF_INSET_DMAC;
-   VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr1,
-   ETH,
-   DST);
-   } else if 
(!rte_is_zero_ether_addr(ð_mask->hdr.src_addr)) {
-   input_set |= IAVF_INSET_SMAC;
-   VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr1,
-   ETH,
-   SRC);
-   }
-
-   if (eth_mask->hdr.ether_type) {
-   if (eth_mask->hdr.ether_type != 
RTE_BE16(0x)) {
-   rte_flow_error_set(error, 
EINVAL,
-   
RTE_FLOW_ERROR_TYPE_ITEM,
-   item, "Invalid type 
mask.");
-   return -rte_errno;
+   if 
((!rte_is_zero_ether_addr(ð_mask->hdr.dst_addr) &&
+   
!rte_is_broadcast_ether_addr(ð_mask->hdr.dst_addr)) ||
+   
(!rte_is_zero_ether_addr(ð_mask->hdr.src_addr) &&
+   
!rte_is_broadcast_ether_addr(ð_mask->hdr.src_addr))) {
+   if 
(!rte_is_zero_ether_addr(ð_mask->hdr.dst_addr))
+   input_set |= IAVF_INSET_DMAC;
+   if 
(!rte_is_zero_ether_addr(ð_mask->hdr.src_addr))
+   input_set |= IAVF_INSET_SMAC;
+   if (eth_mask->hdr.ether_type)
+   input_set |= 
IAVF_INSET_ETHERTYPE;
+   rte_memcpy(hdr1_w_msk->buffer_spec, 
eth_spec,
+   sizeof(struct rte_ether_hdr));
+   rte_memcpy(hdr1_w_msk->buffer_mask, 
eth_mask,
+   sizeof(struct rte_ether_hdr));
+   with_mask = 1;
+   } else {
+   if 
(!rte_is_zero_ether_addr(ð_mask->hdr.dst_addr)) {
+   input_set |= IAVF_INSET_DMAC;
+   
VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr1,
+   
ETH,
+   
DST);
+   } else if 
(!rte_is_zero_ether_addr(ð_mask->hdr.src_addr)) {
+  

[PATCH v3] lib/net: fix tcp/udp cksum with padding data

2023-12-11 Thread Kaiwen Deng
IEEE 802 packets may have a minimum size limit. The data fields
should be padded when necessary. In some cases, the padding data
is not zero.

In 'rte_ipv4_udptcp_cksum_mbuf()', as payload length
"mbuf->pkt_len - l4_off" is used, which includes padding and if
padding is not zero it will end up producing wrong checksum.

This patch will use IP header to get the payload size to calculate
tcp/udp checksum.

Fixes: d178f693bbfe ("net: add UDP/TCP checksum in mbuf segments")
Cc: sta...@dpdk.org

Signed-off-by: Kaiwen Deng 
---
 lib/net/rte_ip.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h
index 6fa98a5a0f..c503a2b57f 100644
--- a/lib/net/rte_ip.h
+++ b/lib/net/rte_ip.h
@@ -423,7 +423,10 @@ __rte_ipv4_udptcp_cksum_mbuf(const struct rte_mbuf *m,
if (l4_off > m->pkt_len)
return 0;
 
-   if (rte_raw_cksum_mbuf(m, l4_off, m->pkt_len - l4_off, &raw_cksum))
+   uint16_t len = rte_be_to_cpu_16(ipv4_hdr->total_length) -
+   (uint16_t)rte_ipv4_hdr_len(ipv4_hdr);
+
+   if (rte_raw_cksum_mbuf(m, l4_off, len, &raw_cksum))
return 0;
 
cksum = raw_cksum + rte_ipv4_phdr_cksum(ipv4_hdr, 0);
@@ -666,7 +669,9 @@ __rte_ipv6_udptcp_cksum_mbuf(const struct rte_mbuf *m,
if (l4_off > m->pkt_len)
return 0;
 
-   if (rte_raw_cksum_mbuf(m, l4_off, m->pkt_len - l4_off, &raw_cksum))
+   uint16_t len = rte_be_to_cpu_16(ipv6_hdr->payload_len);
+
+   if (rte_raw_cksum_mbuf(m, l4_off, len, &raw_cksum))
return 0;
 
cksum = raw_cksum + rte_ipv6_phdr_cksum(ipv6_hdr, 0);
-- 
2.34.1



[PATCH 1/5] app/test-pm: add multiprocess test

2023-12-11 Thread Artemy Kovalyov
This commit adds a test scenario that initiates multiple processes
concurrently. These processes attach to the same shared heap, with an
automatic detection mechanism to identify the primary process.

Signed-off-by: Artemy Kovalyov 
---
 app/meson.build |  1 +
 app/test-mp/main.c  | 49 +
 app/test-mp/meson.build |  8 
 app/test-mp/run.sh  | 39 +++
 4 files changed, 97 insertions(+)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

diff --git a/app/meson.build b/app/meson.build
index 8aaed59..1b80091 100644
--- a/app/meson.build
+++ b/app/meson.build
@@ -30,6 +30,7 @@ apps = [
 'test-flow-perf',
 'test-gpudev',
 'test-mldev',
+'test-mp',
 'test-pipeline',
 'test-pmd',
 'test-regex',
diff --git a/app/test-mp/main.c b/app/test-mp/main.c
new file mode 100644
index 000..0a0fbbf
--- /dev/null
+++ b/app/test-mp/main.c
@@ -0,0 +1,49 @@
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+rte_atomic32_t g_count;
+
+static int
+done(const struct rte_mp_msg *msg __rte_unused, const void *arg __rte_unused)
+{
+   rte_atomic32_dec(&g_count);
+   return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+   void *p;
+   int ret;
+
+   ret = rte_eal_init(argc, argv);
+   assert(ret >= 0);
+
+   rte_atomic32_set(&g_count, atoi(argv[++ret]));
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   ret = rte_mp_action_register("done", done);
+   assert(ret == 0);
+   }
+
+   p = rte_malloc(NULL, 0x100, 0x1000);
+   assert(p);
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   uint64_t timeout = rte_rdtsc() + 5 * rte_get_tsc_hz();
+
+   while (rte_atomic32_read(&g_count) > 0)
+   assert(rte_rdtsc() < timeout);
+   } else {
+   struct rte_mp_msg msg = { .name = "done" };
+
+   rte_mp_sendmsg(&msg);
+   }
+
+   rte_eal_cleanup();
+   return 0;
+}
diff --git a/app/test-mp/meson.build b/app/test-mp/meson.build
new file mode 100644
index 000..feb9e20
--- /dev/null
+++ b/app/test-mp/meson.build
@@ -0,0 +1,8 @@
+if is_windows
+build = false
+reason = 'not supported on Windows'
+subdir_done()
+endif
+
+sources = files('main.c')
+deps = ['eal'] # , 'mempool', 'net', 'mbuf', 'ethdev', 'cmdline']
diff --git a/app/test-mp/run.sh b/app/test-mp/run.sh
new file mode 100755
index 000..8de07e2
--- /dev/null
+++ b/app/test-mp/run.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+logdir=/tmp/dpdk_test_mp
+repeat=1
+lastcore=$(($(nproc) - 1))
+log=1
+
+while getopts p:r:lL:d op; do case $op in
+p) lastcore=$OPTARG ;;
+r) repeat=$OPTARG ;;
+L) logdir=$OPTARG ;;
+l) log=0 ;;
+d) debug=1 ;;
+esac done
+shift $((OPTIND-1))
+
+test=$1
+logpath=$logdir/$(date +%y%m%d-%H%M%S)
+
+rm -f core.*
+pkill dpdk-test-mp
+
+for j in $(seq $repeat) ; do
+[ $log ] && mkdir -p $logpath/$j
+for i in $(seq 0 $lastcore) ; do
+   args="-l $i --file-prefix=dpdk1 --proc-type=auto"
+   if [ $debug ] ; then
+   args="$args --log-level=lib.eal:8"
+   fi
+   if [ $log ] ; then
+   $test $args $lastcore >$logpath/$j/$i.log 2>&1 &
+   else
+   $test $args $lastcore &
+   fi
+done
+wait || break
+[ $(ls core.* 2>/dev/null | wc -l) -gt 0 ] && break
+echo iteration $j passed
+done
-- 
1.8.3.1



[PATCH 3/5] ipc: fix mp channel closure to prevent message loss

2023-12-11 Thread Artemy Kovalyov
This commit addresses an issue related to the cleanup of the
multiprocess channel. Previously, when closing the channel, there was a
risk of losing trailing messages. This issue was particularly noticeable
when broadcast message from primary to secondary processes was sent
while a secondary process was closing it's mp channel. In this fix, we
delete mp socket file before stopping mp receive thread.

Fixes: e7885281ded1 ("ipc: stop mp control thread on cleanup")
Cc: sta...@dpdk.org

Signed-off-by: Artemy Kovalyov 
---
 lib/eal/common/eal_common_proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/eal/common/eal_common_proc.c b/lib/eal/common/eal_common_proc.c
index 728815c..d34fdda 100644
--- a/lib/eal/common/eal_common_proc.c
+++ b/lib/eal/common/eal_common_proc.c
@@ -593,7 +593,7 @@ enum async_action {
 }
 
 static void
-close_socket_fd(int fd)
+remove_socket_fd(int fd)
 {
char path[PATH_MAX];
 
@@ -672,9 +672,9 @@ enum async_action {
if (fd < 0)
return;
 
+   remove_socket_fd(fd);
pthread_cancel((pthread_t)mp_handle_tid.opaque_id);
rte_thread_join(mp_handle_tid, NULL);
-   close_socket_fd(fd);
 }
 
 /**
-- 
1.8.3.1



[PATCH 4/5] eal: fix first time primary autodetect

2023-12-11 Thread Artemy Kovalyov
If the configuration file is absent, the autodetection function should
generate and secure it. Otherwise, multiple simultaneous openings could
erroneously identify themselves as primary instances.

Fixes: af75078fece3 ("first public release")
Cc: sta...@dpdk.org

Signed-off-by: Artemy Kovalyov 
---
 lib/eal/linux/eal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 57da058..9b59cec 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -360,7 +360,7 @@ enum rte_proc_type_t
 * keep that open and don't close it to prevent a race condition
 * between multiple opens.
 */
-   if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+   if (((mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0600)) >= 
0) &&
(fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
ptype = RTE_PROC_SECONDARY;
}
-- 
1.8.3.1



[PATCH 5/5] eal: fix memzone fbarray cleanup

2023-12-11 Thread Artemy Kovalyov
The initialization of the Memzone file-backed array ensures its
uniqueness by employing an exclusive lock. This is crucial because only
one primary process can exist per specific shm_id, which is further
protected by the exclusive EAL runtime configuration lock.

However, during the process closure, the exclusive lock on both the
fbarray and the configuration is not explicitly released. The
responsibility of releasing these locks is left to the generic quit
procedure. This can lead to a potential race condition when the
configuration is released before the fbarray.

To address this, we propose explicitly closing the memzone fbarray. This
ensures proper order of operations during process closure and prevents
any potential race conditions arising from the mismatched lock release
timings.

Fixes: af75078fece3 ("first public release")
Cc: sta...@dpdk.org

Signed-off-by: Artemy Kovalyov 
---
 lib/eal/common/eal_common_memzone.c | 12 
 lib/eal/common/eal_private.h|  5 +
 lib/eal/linux/eal.c |  1 +
 3 files changed, 18 insertions(+)

diff --git a/lib/eal/common/eal_common_memzone.c 
b/lib/eal/common/eal_common_memzone.c
index 1f3e701..7db8029 100644
--- a/lib/eal/common/eal_common_memzone.c
+++ b/lib/eal/common/eal_common_memzone.c
@@ -447,6 +447,18 @@
return ret;
 }
 
+void
+rte_eal_memzone_cleanup(void)
+{
+   struct rte_mem_config *mcfg;
+
+   mcfg = rte_eal_get_configuration()->mem_config;
+
+   if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+   rte_fbarray_destroy(&mcfg->memzones);
+   }
+}
+
 /* Walk all reserved memory zones */
 void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
  void *arg)
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index 4d2e806..944c365 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -81,6 +81,11 @@ struct rte_config {
 int rte_eal_memzone_init(void);
 
 /**
+ * Cleanup the memzone subsystem (private to eal).
+ */
+void rte_eal_memzone_cleanup(void);
+
+/**
  * Fill configuration with number of physical and logical processors
  *
  * This function is private to EAL.
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index 9b59cec..dfcbe64 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -1375,6 +1375,7 @@ static void rte_eal_init_alert(const char *msg)
eal_trace_fini();
eal_mp_dev_hotplug_cleanup();
rte_eal_alarm_cleanup();
+   rte_eal_memzone_cleanup();
/* after this point, any DPDK pointers will become dangling */
rte_eal_memory_detach();
rte_eal_malloc_heap_cleanup();
-- 
1.8.3.1



[PATCH 0/5] addressing races in concurrent process startup

2023-12-11 Thread Artemy Kovalyov
In the process of initiating multiple processes concurrently, specifically with
automatic detection of the primary process, certain race conditions have been
identified. This patch series introduces a straightforward test that showcases
the issue and subsequently addresses the problems surfaced by the test. These
fixes aim to ensure the robust and secure utilization of DPDK within intricate
solutions that involve starting processes with job orchestrators such as Slurm
or Hadoop YARN.

Artemy Kovalyov (5):
  app/test-pm: add multiprocess test
  eal: fix multiprocess hotplug race
  ipc: fix mp channel closure to prevent message loss
  eal: fix first time primary autodetect
  eal: fix memzone fbarray cleanup

 app/meson.build |  1 +
 app/test-mp/main.c  | 49 +
 app/test-mp/meson.build |  8 ++
 app/test-mp/run.sh  | 39 +
 lib/eal/common/eal_common_memzone.c | 12 +
 lib/eal/common/eal_common_proc.c|  4 +--
 lib/eal/common/eal_private.h|  5 
 lib/eal/common/hotplug_mp.c |  3 +++
 lib/eal/linux/eal.c |  3 ++-
 9 files changed, 121 insertions(+), 3 deletions(-)
 create mode 100644 app/test-mp/main.c
 create mode 100644 app/test-mp/meson.build
 create mode 100755 app/test-mp/run.sh

-- 
1.8.3.1



[PATCH 2/5] eal: fix multiprocess hotplug race

2023-12-11 Thread Artemy Kovalyov
There exists a time gap between the creation of the multiprocess channel
and the registration of request action handlers. Within this window, a
secondary process that receives an eal_dev_mp_request broadcast
notification might respond with ENOTSUP. This, in turn, causes the
rte_dev_probe() operation to fail in another secondary process.
To avoid this, disregarding ENOTSUP responses to attach notifications.

Fixes: 244d5130719c ("eal: enable hotplug on multi-process")
Cc: sta...@dpdk.org

Signed-off-by: Artemy Kovalyov 
---
 lib/eal/common/hotplug_mp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/eal/common/hotplug_mp.c b/lib/eal/common/hotplug_mp.c
index 6027819..e6a3f6b 100644
--- a/lib/eal/common/hotplug_mp.c
+++ b/lib/eal/common/hotplug_mp.c
@@ -428,6 +428,9 @@ int eal_dev_hotplug_request_to_secondary(struct 
eal_dev_mp_req *req)
if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
resp->result == -EEXIST)
continue;
+   if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+   resp->result == -ENOTSUP)
+   continue;
if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
resp->result == -ENOENT)
continue;
-- 
1.8.3.1



Re: [PATCH v2 3/3] event/cnxk: add option to update links via mbox

2023-12-11 Thread Jerin Jacob
On Tue, Dec 12, 2023 at 3:13 AM  wrote:
>
> From: Pavan Nikhilesh 
>
> Add option to update event queue to event port links via
> mailbox.
>
> Signed-off-by: Pavan Nikhilesh 

Updated the git commit as follows and series applied to
dpdk-next-net-eventdev/for-main. Thanks

commit 43b0ec0db7eabae40b60d0d29b7eee69531228f3 (HEAD -> for-main)
Author: Pavan Nikhilesh 
Date:   Mon Dec 11 18:53:20 2023 +0530

event/cnxk: add option to update links via mbox

Add option to update event queue to event port links via
mailbox.

Signed-off-by: Pavan Nikhilesh 

commit e638054cc0a0b7b320d1c4554fd78c96ca95f514
Author: Pavan Nikhilesh 
Date:   Mon Dec 11 18:53:19 2023 +0530

common/cnxk: update scheduler base code

Updated event scheduler base code to add checks to avoid
sending unnecessary mbox requests.

Signed-off-by: Pavan Nikhilesh 

commit d6601056e30156339ba972d1b9ca568867037484
Author: Pavan Nikhilesh 
Date:   Mon Dec 11 18:53:18 2023 +0530

common/cnxk: update timer base code

Updated event timer base code to add mailbox to capture multiple
clock sources also additional supported clock sources SYNCE,
BTS, EXT_MIO, EXT_GTI.

Signed-off-by: Pavan Nikhilesh 


Re: [PATCH v5 3/3] net/octeon_ep: use AVX2 instructions for Rx

2023-12-11 Thread Jerin Jacob
On Mon, Dec 11, 2023 at 8:33 PM  wrote:
>
> From: Pavan Nikhilesh 
>
> Optimize Rx routine to use AVX2 instructions when underlying
> architecture supports it.
>
> Signed-off-by: Pavan Nikhilesh 

Series applied to dpdk-next-net-mrvl/for-main. Thanks


RE: 20.11.10 patches review and test

2023-12-11 Thread Xu, HailinX
> -Original Message-
> From: luca.bocca...@gmail.com 
> Sent: Friday, December 1, 2023 7:51 PM
> To: sta...@dpdk.org
> Cc: dev@dpdk.org; Abhishek Marathe ;
> Ali Alnubani ; benjamin.wal...@intel.com; David
> Christensen ; Hemant Agrawal
> ; Stokes, Ian ; Jerin Jacob
> ; Mcnamara, John ; Ju-
> Hyoung Lee ; Kevin Traynor
> ; Luca Boccassi ; Pei Zhang
> ; qian.q...@intel.com; Raslan Darawsheh
> ; Thomas Monjalon ;
> Yanghang Liu ; yuan.p...@intel.com;
> zhaoyan.c...@intel.com
> Subject: 20.11.10 patches review and test
> 
> Hi all,
> 
> Here is a list of patches targeted for stable release 20.11.10.
> 
> The planned date for the final release is December 12th.
> 
> Please help with testing and validation of your use cases and report any
> issues/results with reply-all to this mail. For the final release the fixes 
> and
> reported validations will be added to the release notes.
> 
> A release candidate tarball can be found at:
> 
> https://dpdk.org/browse/dpdk-stable/tag/?id=v20.11.10-rc1
> 
> These patches are located at branch 20.11 of dpdk-stable repo:
> https://dpdk.org/browse/dpdk-stable/
> 
> Thanks.
> 
> Luca Boccassi
Update the test status for Intel part. Till now dpdk20.11.10-rc1 all validation 
test is done. No new issue is found.

# Basic Intel(R) NIC testing
* Build & CFLAG compile: cover the build test combination with latest GCC/Clang 
version and the popular OS revision such as Ubuntu22.04, Ubuntu20.04, Fedora38, 
RHEL9.2, RHEL8.7, FreeBSD13.2, Centos7.9 etc.
- All test done. No new dpdk issue is found.
* PF(i40e, ixgbe): test scenarios including RTE_FLOW/TSO/Jumboframe/checksum 
offload/VLAN/VXLAN, etc. 
- All test done. No new dpdk issue is found.
* VF(i40e, ixgbe): test scenarios including VF-RTE_FLOW/TSO/Jumboframe/checksum 
offload/VLAN/VXLAN, etc.
- All test done. No new dpdk issue is found.
* PF/VF(ice): test scenarios including Switch features/Package Management/Flow 
Director/Advanced Tx/Advanced RSS/ACL/DCF/Flexible Descriptor, etc.
- All test done. No new dpdk issue is found.
* Intel NIC single core/NIC performance: test scenarios including PF/VF single 
core performance test, etc.
- All test done. No new dpdk issue is found.
* IPsec: test scenarios including ipsec/ipsec-gw/ipsec library basic test - 
QAT&SW/FIB library, etc.
- All test done. No new dpdk issue is found.

# Basic cryptodev and virtio testing
* Virtio: both function and performance test are covered. Such as 
PVP/Virtio_loopback/virtio-user loopback/virtio-net VM2VM perf testing/VMAWARE 
ESXI 8.0, etc.
- All test done. No new dpdk issue is found.
* Cryptodev: 
  *Function test: test scenarios including Cryptodev API testing/CompressDev 
ISA-L/QAT/ZLIB PMD Testing/FIPS, etc.
- All test done. No new dpdk issue is found.
  *Performance test: test scenarios including Thoughput Performance/Cryptodev 
Latency, etc.
- All test done. No new dpdk issue is found.

Regards,
Xu, Hailin


Re: [PATCH] cryptodev: convert to dynamic logtype

2023-12-11 Thread David Marchand
On Mon, Dec 11, 2023 at 9:18 PM Stephen Hemminger
 wrote:
>
> The cryptodev logs are all referenced via rte_crytpodev.h

crypto*

> so make it dynamic there.
>
> Signed-off-by: Stephen Hemminger 

I would add a comment in the header that this exported logtype
variable is for internal use.
Otherwise it lgtm.

Akhil, do you mind if I take this directly in main?


-- 
David Marchand



Re: [PATCH] eventdev: replace RTE_LOGTYPE_EVENTDEV with a dynamic type

2023-12-11 Thread David Marchand
On Mon, Dec 11, 2023 at 9:33 PM Stephen Hemminger
 wrote:
>
> With a little setup in eventdev_pmd.h the eventdev drivers
> and API can be converted to dynamic log type.
>
> Signed-off-by: Stephen Hemminger 

LGTM (with a comment on logtype).

Jerin, can I take this patch directly through main?


-- 
David Marchand