[PATCH] app/testpmd: fix parsing for connection tracking item

2024-06-02 Thread Maayan Kashani
In command line translation there were missing fields for
connection tracking item, as a result this item was not parsed
and was missing from the items list received from test-pmd.

Fixes: 4d07cbefe3ba ("app/testpmd: add commands for conntrack")
Cc: sta...@dpdk.org
Signed-off-by: Maayan Kashani 
---
 app/test-pmd/cmdline_flow.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 60ee9337cf..1f9d5ebd05 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -5797,9 +5797,12 @@ static const struct token token_list[] = {
[ITEM_CONNTRACK] = {
.name = "conntrack",
.help = "conntrack state",
+   .priv = PRIV_ITEM(CONNTRACK,
+ sizeof(struct rte_flow_item_conntrack)),
.next = NEXT(NEXT_ENTRY(ITEM_NEXT), NEXT_ENTRY(COMMON_UNSIGNED),
 item_param),
.args = ARGS(ARGS_ENTRY(struct rte_flow_item_conntrack, flags)),
+   .call = parse_vc,
},
[ITEM_PORT_REPRESENTOR] = {
.name = "port_representor",
-- 
2.25.1



[PATCH 1/3] net/mlx5/hws: split the root rule creation and destruction

2024-06-02 Thread Maayan Kashani
From: Yevgeny Kliteynik 

Split the root rule creation/destruction into two stages:
 - do the job (create/destroy rule)
 - generate completion

Completion generation is required for the usual HWS API.
The create/destroy functions that don't generate completion
are exposed in header file and will be used by the coming
backward-compatible API.

Signed-off-by: Yevgeny Kliteynik 
---
 drivers/net/mlx5/hws/mlx5dr_rule.c | 49 +-
 drivers/net/mlx5/hws/mlx5dr_rule.h |  7 +
 2 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_rule.c 
b/drivers/net/mlx5/hws/mlx5dr_rule.c
index 171a0bff38..550f00a4c1 100644
--- a/drivers/net/mlx5/hws/mlx5dr_rule.c
+++ b/drivers/net/mlx5/hws/mlx5dr_rule.c
@@ -680,15 +680,12 @@ static int mlx5dr_rule_destroy_hws(struct mlx5dr_rule 
*rule,
return 0;
 }
 
-static int mlx5dr_rule_create_root(struct mlx5dr_rule *rule,
-  struct mlx5dr_rule_attr *rule_attr,
-  const struct rte_flow_item items[],
-  uint8_t at_idx,
-  struct mlx5dr_rule_action rule_actions[])
+int mlx5dr_rule_create_root_no_comp(struct mlx5dr_rule *rule,
+   const struct rte_flow_item items[],
+   uint8_t num_actions,
+   struct mlx5dr_rule_action rule_actions[])
 {
struct mlx5dv_flow_matcher *dv_matcher = rule->matcher->dv_matcher;
-   uint8_t num_actions = rule->matcher->at[at_idx].num_actions;
-   struct mlx5dr_context *ctx = rule->matcher->tbl->ctx;
struct mlx5dv_flow_match_parameters *value;
struct mlx5_flow_attr flow_attr = {0};
struct mlx5dv_flow_action_attr *attr;
@@ -750,9 +747,6 @@ static int mlx5dr_rule_create_root(struct mlx5dr_rule *rule,
num_actions,
attr);
 
-   mlx5dr_rule_gen_comp(&ctx->send_queue[rule_attr->queue_id], rule, 
!rule->flow,
-rule_attr->user_data, MLX5DR_RULE_STATUS_CREATED);
-
simple_free(value);
simple_free(attr);
 
@@ -766,14 +760,41 @@ static int mlx5dr_rule_create_root(struct mlx5dr_rule 
*rule,
return rte_errno;
 }
 
+static int mlx5dr_rule_create_root(struct mlx5dr_rule *rule,
+  struct mlx5dr_rule_attr *rule_attr,
+  const struct rte_flow_item items[],
+  uint8_t num_actions,
+  struct mlx5dr_rule_action rule_actions[])
+{
+   struct mlx5dr_context *ctx = rule->matcher->tbl->ctx;
+   int ret;
+
+   ret = mlx5dr_rule_create_root_no_comp(rule, items,
+ num_actions, rule_actions);
+   if (ret)
+   return rte_errno;
+
+   mlx5dr_rule_gen_comp(&ctx->send_queue[rule_attr->queue_id], rule, 
!rule->flow,
+rule_attr->user_data, MLX5DR_RULE_STATUS_CREATED);
+
+   return 0;
+}
+
+int mlx5dr_rule_destroy_root_no_comp(struct mlx5dr_rule *rule)
+{
+   if (rule->flow)
+   return ibv_destroy_flow(rule->flow);
+
+   return 0;
+}
+
 static int mlx5dr_rule_destroy_root(struct mlx5dr_rule *rule,
struct mlx5dr_rule_attr *attr)
 {
struct mlx5dr_context *ctx = rule->matcher->tbl->ctx;
-   int err = 0;
+   int err;
 
-   if (rule->flow)
-   err = ibv_destroy_flow(rule->flow);
+   err = mlx5dr_rule_destroy_root_no_comp(rule);
 
mlx5dr_rule_gen_comp(&ctx->send_queue[attr->queue_id], rule, err,
 attr->user_data, MLX5DR_RULE_STATUS_DELETED);
@@ -970,7 +991,7 @@ int mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
ret = mlx5dr_rule_create_root(rule_handle,
  attr,
  items,
- at_idx,
+ matcher->at[at_idx].num_actions,
  rule_actions);
else
ret = mlx5dr_rule_create_hws(rule_handle,
diff --git a/drivers/net/mlx5/hws/mlx5dr_rule.h 
b/drivers/net/mlx5/hws/mlx5dr_rule.h
index dffaec1c0f..bc542eb543 100644
--- a/drivers/net/mlx5/hws/mlx5dr_rule.h
+++ b/drivers/net/mlx5/hws/mlx5dr_rule.h
@@ -74,4 +74,11 @@ int mlx5dr_rule_move_hws_add(struct mlx5dr_rule *rule,
 
 bool mlx5dr_rule_move_in_progress(struct mlx5dr_rule *rule);
 
+int mlx5dr_rule_create_root_no_comp(struct mlx5dr_rule *rule,
+   const struct rte_flow_item items[],
+   uint8_t num_actions,
+   struct mlx5dr_rule_action rule_actions[]);
+
+int mlx5dr_rule_dest

[PATCH 1/6] net/mlx5: update NTA rule pattern and actions flags

2024-06-02 Thread Maayan Kashani
From: Gregory Etelson 

Move pattern flags bitmap to flow_hw_list_create.
Create actions flags bitmap in flow_hw_list_create.

PMD uses pattern and actions bitmaps for direct queries instead of
iterating arrays.

Signed-off-by: Gregory Etelson 
---
 drivers/net/mlx5/mlx5_flow_hw.c | 147 +++-
 1 file changed, 126 insertions(+), 21 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index d938b5976a..696f675f63 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -568,6 +568,111 @@ flow_hw_matching_item_flags_get(const struct 
rte_flow_item items[])
return item_flags;
 }
 
+static uint64_t
+flow_hw_action_flags_get(const struct rte_flow_action actions[],
+struct rte_flow_error *error)
+{
+   uint64_t action_flags = 0;
+   const struct rte_flow_action *action;
+
+   for (action = actions; action->type != RTE_FLOW_ACTION_TYPE_END; 
action++) {
+   int type = (int)action->type;
+   switch (type) {
+   case RTE_FLOW_ACTION_TYPE_INDIRECT:
+   switch (MLX5_INDIRECT_ACTION_TYPE_GET(action->conf)) {
+   case MLX5_INDIRECT_ACTION_TYPE_RSS:
+   goto rss;
+   case MLX5_INDIRECT_ACTION_TYPE_AGE:
+   goto age;
+   case MLX5_INDIRECT_ACTION_TYPE_COUNT:
+   goto count;
+   case MLX5_INDIRECT_ACTION_TYPE_CT:
+   goto ct;
+   case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
+   goto meter;
+   default:
+   goto error;
+   }
+   break;
+   case RTE_FLOW_ACTION_TYPE_DROP:
+   action_flags |= MLX5_FLOW_ACTION_DROP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_MARK:
+   action_flags |= MLX5_FLOW_ACTION_MARK;
+   break;
+   case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+   action_flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+   break;
+   case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+   action_flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+   break;
+   case RTE_FLOW_ACTION_TYPE_JUMP:
+   action_flags |= MLX5_FLOW_ACTION_JUMP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_QUEUE:
+   action_flags |= MLX5_FLOW_ACTION_QUEUE;
+   break;
+   case RTE_FLOW_ACTION_TYPE_RSS:
+rss:
+   action_flags |= MLX5_FLOW_ACTION_RSS;
+   break;
+   case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+   case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
+   action_flags |= MLX5_FLOW_ACTION_ENCAP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
+   action_flags |= MLX5_FLOW_ACTION_ENCAP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+   case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
+   action_flags |= MLX5_FLOW_ACTION_DECAP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
+   action_flags |= MLX5_FLOW_ACTION_DECAP;
+   break;
+   case RTE_FLOW_ACTION_TYPE_SEND_TO_KERNEL:
+   action_flags |= MLX5_FLOW_ACTION_SEND_TO_KERNEL;
+   break;
+   case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
+   action_flags |= MLX5_FLOW_ACTION_MODIFY_FIELD;
+   break;
+   case RTE_FLOW_ACTION_TYPE_PORT_ID:
+   case RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT:
+   action_flags |= MLX5_FLOW_ACTION_PORT_ID;
+   break;
+   case RTE_FLOW_ACTION_TYPE_AGE:
+age:
+   action_flags |= MLX5_FLOW_ACTION_AGE;
+   break;
+   case RTE_FLOW_ACTION_TYPE_COUNT:
+count:
+   action_flags |= MLX5_FLOW_ACTION_COUNT;
+   break;
+   case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+ct:
+   action_flags |= MLX5_FLOW_ACTION_CT;
+   break;
+   case RTE_FLOW_ACTION_TYPE_METER_MARK:
+meter:
+   action_flags |= MLX5_FLOW_ACTION_METER;
+   break;
+   case MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS:
+   action_flags |= MLX5_FLOW_ACTION_DEFAULT_MISS;
+   break;
+   case RTE_FLOW_ACTION_TYPE_VOID:
+   case RTE_FLOW_ACTION_TYPE_END:
+ 

[PATCH 1/4] net/mlx5: reorganize main structures

2024-06-02 Thread Maayan Kashani
Reorganize rte_flow_hw and rte_flow_nt2hws
structures for better performance, and removed packed.

Signed-off-by: Maayan Kashani 
---
 drivers/net/mlx5/mlx5_flow.h | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 7e0f005741..5a3f047968 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1328,12 +1328,12 @@ struct rte_flow_nt2hws {
struct rte_flow_hw_aux *flow_aux;
/** Modify header pointer. */
struct mlx5_flow_dv_modify_hdr_resource *modify_hdr;
+   /** Chain NTA flows. */
+   SLIST_ENTRY(rte_flow_hw) next;
/** Encap/decap index. */
uint32_t rix_encap_decap;
uint8_t chaned_flow;
-   /** Chain NTA flows. */
-   SLIST_ENTRY(rte_flow_hw) next;
-} __rte_packed;
+};
 
 /** HWS flow struct. */
 struct rte_flow_hw {
@@ -1345,6 +1345,12 @@ struct rte_flow_hw {
};
/** Application's private data passed to enqueued flow operation. */
void *user_data;
+   union {
+   /** Jump action. */
+   struct mlx5_hw_jump_action *jump;
+   /** TIR action. */
+   struct mlx5_hrxq *hrxq;
+   };
/** Flow index from indexed pool. */
uint32_t idx;
/** Resource index from indexed pool. */
@@ -1353,20 +1359,12 @@ struct rte_flow_hw {
uint32_t rule_idx;
/** Which flow fields (inline or in auxiliary struct) are used. */
uint32_t flags;
+   /** COUNT action index. */
+   cnt_id_t cnt_id;
/** Ongoing flow operation type. */
uint8_t operation_type;
/** Index of pattern template this flow is based on. */
uint8_t mt_idx;
-
-   /** COUNT action index. */
-   cnt_id_t cnt_id;
-   union {
-   /** Jump action. */
-   struct mlx5_hw_jump_action *jump;
-   /** TIR action. */
-   struct mlx5_hrxq *hrxq;
-   };
-
/** Equals true if it is non template rule. */
bool nt_rule;
/**
@@ -1377,7 +1375,7 @@ struct rte_flow_hw {
uint8_t padding[9];
/** HWS layer data struct. */
uint8_t rule[];
-} __rte_packed;
+};
 
 /** Auxiliary data fields that are updatable. */
 struct rte_flow_hw_aux_fields {
-- 
2.25.1


>From 662c1b56b66c3d50eb3ee9911bc35b63811ae0af Mon Sep 17 00:00:00 2001
From: Maayan Kashani 
Date: Wed, 13 Mar 2024 08:45:19 +0200
Subject: [PATCH 2/4] net/mlx5: set modify header as shared action

In current implementation, in non template mode,
modify header action is not set as always shared.
Align to HWS implementation, setting modify header action as always shared.
Optimize mask initialization.

Signed-off-by: Maayan Kashani 
---
 drivers/net/mlx5/mlx5_flow.h|  3 --
 drivers/net/mlx5/mlx5_flow_dv.c | 10 ++--
 drivers/net/mlx5/mlx5_flow_hw.c | 91 +++--
 3 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 5a3f047968..f5bb01616e 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -669,9 +669,6 @@ struct mlx5_flow_dv_modify_hdr_resource {
struct mlx5_list_entry entry;
void *action; /**< Modify header action object. */
uint32_t idx;
-#ifdef HAVE_MLX5_HWS_SUPPORT
-   void *mh_dr_pattern; /**< Modify header DR pattern(HWS only). */
-#endif
uint64_t flags; /**< Flags for RDMA API(HWS only). */
/* Key area for hash list matching: */
uint8_t ft_type; /**< Flow table type, Rx or Tx. */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 3611ffa4a1..94af391894 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -6219,9 +6219,7 @@ flow_modify_create_cb(void *tool_ctx, void *cb_ctx)
uint32_t data_len = ref->actions_num * sizeof(ref->actions[0]);
uint32_t key_len = sizeof(*ref) - offsetof(typeof(*ref), ft_type);
uint32_t idx;
-   struct mlx5_tbl_multi_pattern_ctx *mpctx;
 
-   typeof(mpctx->mh) *mh_dr_pattern = ref->mh_dr_pattern;
if (unlikely(!ipool)) {
rte_flow_error_set(ctx->error, ENOMEM,
   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -6240,9 +6238,13 @@ flow_modify_create_cb(void *tool_ctx, void *cb_ctx)
key_len + data_len);
if (sh->config.dv_flow_en == 2) {
 #ifdef HAVE_MLX5_HWS_SUPPORT
+   struct mlx5dr_action_mh_pattern pattern = {
+   .sz = data_len,
+   .data = (__be64 *)ref->actions
+   };
entry->action = mlx5dr_action_create_modify_header(ctx->data2,
-   mh_dr_pattern->elements_num,
-   mh_dr_pattern->pattern, 0, ref->flags);
+   1,
+   &pattern, 0, ref->

[PATCH 1/7] net/mlx5/hws: fix bug in matcher disconnect error flow

2024-06-02 Thread Maayan Kashani
From: Yevgeny Kliteynik 

If error happens during disconnect of the first matcher in the
list, the matcher should be reconnected back as the first matcher.

Fixes: b81f95ca770d ("net/mlx5/hws: support default miss table")
Cc: sta...@dpdk.org
Signed-off-by: Yevgeny Kliteynik 
---
 drivers/net/mlx5/hws/mlx5dr_matcher.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_matcher.c 
b/drivers/net/mlx5/hws/mlx5dr_matcher.c
index 79e7401dfd..2a84145566 100644
--- a/drivers/net/mlx5/hws/mlx5dr_matcher.c
+++ b/drivers/net/mlx5/hws/mlx5dr_matcher.c
@@ -340,7 +340,7 @@ static int mlx5dr_matcher_disconnect(struct mlx5dr_matcher 
*matcher)
return 0;
 
 matcher_reconnect:
-   if (LIST_EMPTY(&tbl->head))
+   if (LIST_EMPTY(&tbl->head) || prev_matcher == matcher)
LIST_INSERT_HEAD(&matcher->tbl->head, matcher, next);
else
LIST_INSERT_AFTER(prev_matcher, matcher, next);
-- 
2.25.1


>From 83beeaad51e7175b846b2d61b99787ef75b340f7 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik 
Date: Tue, 5 Mar 2024 02:30:46 +0200
Subject: [PATCH 2/7] net/mlx5/hws: bwc - make burst threshold dynamic

BWC rules rehash process has burst threshold, where the rule
requires completion. This threshold was constant number, and
it can cause rehash fail if this threshold is actually bigger
than the queue size - the queue would end up full.
This patch fixes the threshold to be dynamic and to take into
consideration queue size.

Signed-off-by: Yevgeny Kliteynik 
---
 drivers/net/mlx5/hws/mlx5dr_bwc.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/mlx5/hws/mlx5dr_bwc.c 
b/drivers/net/mlx5/hws/mlx5dr_bwc.c
index eef3053ee0..bfc7dbf100 100644
--- a/drivers/net/mlx5/hws/mlx5dr_bwc.c
+++ b/drivers/net/mlx5/hws/mlx5dr_bwc.c
@@ -22,6 +22,13 @@ mlx5dr_bwc_get_queue_id(struct mlx5dr_context *ctx, uint16_t 
idx)
return idx + mlx5dr_bwc_queues(ctx);
 }
 
+static uint16_t
+mlx5dr_bwc_get_burst_th(struct mlx5dr_context *ctx, uint16_t queue_id)
+{
+   return RTE_MIN(ctx->send_queue[queue_id].num_entries / 2,
+  MLX5DR_BWC_MATCHER_REHASH_BURST_TH);
+}
+
 static rte_spinlock_t *
 mlx5dr_bwc_get_queue_lock(struct mlx5dr_context *ctx, uint16_t idx)
 {
@@ -175,8 +182,9 @@ mlx5dr_bwc_queue_poll(struct mlx5dr_context *ctx,
  bool drain)
 {
bool queue_full = *pending_rules == MLX5DR_BWC_MATCHER_REHASH_QUEUE_SZ;
-   bool got_comp = *pending_rules >= MLX5DR_BWC_MATCHER_REHASH_BURST_TH;
struct rte_flow_op_result comp[MLX5DR_BWC_MATCHER_REHASH_BURST_TH];
+   uint16_t burst_th = mlx5dr_bwc_get_burst_th(ctx, queue_id);
+   bool got_comp = *pending_rules >= burst_th;
int ret;
int i;
 
@@ -185,8 +193,7 @@ mlx5dr_bwc_queue_poll(struct mlx5dr_context *ctx,
return 0;
 
while (queue_full || ((got_comp || drain) && *pending_rules)) {
-   ret = mlx5dr_send_queue_poll(ctx, queue_id, comp,
-
MLX5DR_BWC_MATCHER_REHASH_BURST_TH);
+   ret = mlx5dr_send_queue_poll(ctx, queue_id, comp, burst_th);
if (unlikely(ret < 0)) {
DR_LOG(ERR, "Rehash error: polling queue %d returned 
%d\n",
   queue_id, ret);
@@ -583,6 +590,7 @@ mlx5dr_bwc_matcher_move_all(struct mlx5dr_bwc_matcher 
*bwc_matcher)
struct mlx5dr_bwc_rule **bwc_rules;
struct mlx5dr_rule_attr rule_attr;
uint32_t *pending_rules;
+   uint16_t burst_th;
bool all_done;
int i, j, ret;
 
@@ -617,9 +625,10 @@ mlx5dr_bwc_matcher_move_all(struct mlx5dr_bwc_matcher 
*bwc_matcher)
 
for (i = 0; i < bwc_queues; i++) {
rule_attr.queue_id = mlx5dr_bwc_get_queue_id(ctx, i);
+   burst_th = mlx5dr_bwc_get_burst_th(ctx, 
rule_attr.queue_id);
 
-   for (j = 0; j < MLX5DR_BWC_MATCHER_REHASH_BURST_TH && 
bwc_rules[i]; j++) {
-   rule_attr.burst = !!((j + 1) % 
MLX5DR_BWC_MATCHER_REHASH_BURST_TH);
+   for (j = 0; j < burst_th && bwc_rules[i]; j++) {
+   rule_attr.burst = !!((j + 1) % burst_th);
ret = 
mlx5dr_matcher_resize_rule_move(bwc_matcher->matcher,
  
bwc_rules[i]->rule,
  
&rule_attr);
-- 
2.25.1


>From d056420b64d44928a5596b3965f6e9a89699ace4 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik 
Date: Tue, 5 Mar 2024 01:26:15 +0200
Subject: [PATCH 3/7] net/mlx5/hws: bwc - adding rules with larger action
 template

Current BWC implementation doesn't support a case when a rule is added
with AT that requires more action STEs than any other AT that already
exists on this matcher.
This patch adds t

[PATCH] app/testpmd: support retrying device stop

2024-06-02 Thread Maayan Kashani
From: Dariusz Sosnowski 

In some cases rte_eth_dev_stop() can fail with EBUSY error code meaning
that port cannot be stopped, because of other resources referencing this
port and port must be stopped again after these resources are freed.

This patch adds handling of EBUSY error code on port stop to testpmd.

Signed-off-by: Dariusz Sosnowski 
---
 app/test-pmd/testpmd.c | 47 +++---
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 70ea257fda..2d4d583cdf 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -3361,6 +3361,7 @@ stop_port(portid_t pid)
int need_check_link_status = 0;
portid_t peer_pl[RTE_MAX_ETHPORTS];
int peer_pi;
+   bool left_ebusy = false;
int ret;
 
if (port_id_is_invalid(pid, ENABLED_WARN))
@@ -3412,12 +3413,25 @@ stop_port(portid_t pid)
port_flow_flush(pi);
 
ret = eth_dev_stop_mp(pi);
-   if (ret != 0) {
-   RTE_LOG(ERR, EAL, "rte_eth_dev_stop failed for port 
%u\n",
-   pi);
+   if (ret == -EBUSY) {
+   if (pid == (portid_t)RTE_PORT_ALL) {
+   left_ebusy = true;
+   printf("Stopping port %u will be retried.\n", 
pi);
+   } else {
+   printf("rte_eth_dev_stop failed for port %u. "
+  "It can be stopped again after related 
ports "
+  "are stopped\n", pi);
+   }
+   /* Allow to retry stopping the port. */
+   port->port_status = RTE_PORT_STARTED;
+   continue;
+   } else if (ret != 0) {
+   printf("rte_eth_dev_stop failed for port %u\n", pi);
/* Allow to retry stopping the port. */
port->port_status = RTE_PORT_STARTED;
continue;
+   } else {
+   printf("Port %u is stopped\n", pi);
}
 
if (port->port_status == RTE_PORT_HANDLING)
@@ -3427,6 +3441,27 @@ stop_port(portid_t pid)
pi);
need_check_link_status = 1;
}
+   if (left_ebusy) {
+   RTE_ETH_FOREACH_DEV(pi) {
+   port = &ports[pi];
+
+   if (port->port_status == RTE_PORT_STARTED)
+   port->port_status = RTE_PORT_HANDLING;
+   else
+   continue;
+
+   if (eth_dev_stop_mp(pi) != 0)
+   printf("rte_eth_dev_stop failed for port %u\n", 
pi);
+   else
+   printf("Port %u is stopped\n", pi);
+
+   if (port->port_status == RTE_PORT_HANDLING)
+   port->port_status = RTE_PORT_STOPPED;
+   else
+   fprintf(stderr, "Port %d can not be set into 
stopped\n",
+   pi);
+   }
+   }
if (need_check_link_status && !no_link_check)
check_all_ports_link_status(RTE_PORT_ALL);
 
@@ -3797,11 +3832,7 @@ pmd_test_exit(void)
 #endif
if (ports != NULL) {
no_link_check = 1;
-   RTE_ETH_FOREACH_DEV(pt_id) {
-   printf("\nStopping port %d...\n", pt_id);
-   fflush(stdout);
-   stop_port(pt_id);
-   }
+   stop_port(RTE_PORT_ALL);
RTE_ETH_FOREACH_DEV(pt_id) {
printf("\nShutting down port %d...\n", pt_id);
fflush(stdout);
-- 
2.25.1



[PATCH] net/mlx5: fix non-template HWS RSS action configuration

2024-06-02 Thread Maayan Kashani
From: Gregory Etelson 

SWS mode checks flow items to adjust RSS action configuration.

HWS mode relies on the application to provide the correct
RSS configuration.

The patch allows PMD in the non-template HWS mode to reconfigure RSS
action according to items.

Fixes: b73a84345231 ("net/mlx5: initial design of non template to hws")
Signed-off-by: Gregory Etelson 
---
Depends-on: series-196920 ("net/mlx5: initial design of non template to hws")
---
 drivers/net/mlx5/mlx5_nta_rss.c | 31 ---
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_nta_rss.c b/drivers/net/mlx5/mlx5_nta_rss.c
index 1f0085ff06..56368f5efa 100644
--- a/drivers/net/mlx5/mlx5_nta_rss.c
+++ b/drivers/net/mlx5/mlx5_nta_rss.c
@@ -436,7 +436,7 @@ flow_nta_handle_rss(struct rte_eth_dev *dev,
struct rte_flow_error *error)
 {
struct rte_flow_hw *rss_base = NULL, *rss_next = NULL, *rss_miss = NULL;
-   struct rte_flow_action_rss ptype_rss_conf;
+   struct rte_flow_action_rss ptype_rss_conf = *rss_conf;
struct mlx5_nta_rss_ctx rss_ctx;
uint64_t rss_types = rte_eth_rss_hf_refine(rss_conf->types);
bool inner_rss = rss_conf->level > 1;
@@ -478,6 +478,7 @@ flow_nta_handle_rss(struct rte_eth_dev *dev,
[MLX5_RSS_PTYPE_ACTION_INDEX + 1] = { .type = 
RTE_FLOW_ACTION_TYPE_END }
};
 
+   ptype_rss_conf.types = rss_types;
if (l4_item) {
/*
 * Original flow pattern extended up to L4 level.
@@ -500,10 +501,27 @@ flow_nta_handle_rss(struct rte_eth_dev *dev,
/*
 * Original flow pattern extended up to L3 level.
 * RSS action was not set for L4 hash.
-* Original pattern does not need expansion.
 */
-   rte_flow_error_set(error, 0, RTE_FLOW_ERROR_TYPE_NONE, 
NULL, NULL);
-   return NULL;
+   bool ip6_item =
+   (outer_rss && (item_flags & 
MLX5_FLOW_LAYER_OUTER_L3_IPV6)) ||
+   (inner_rss && (item_flags & 
MLX5_FLOW_LAYER_INNER_L3_IPV6));
+   bool ip6_hash = rss_types & RTE_ETH_RSS_IPV6;
+
+   if (ip6_item && ip6_hash) {
+   /*
+* MLX5 HW will not activate TIR IPv6 hash
+* if that TIR has also IPv4 hash
+*/
+   ptype_rss_conf.types &= ~MLX5_IPV4_LAYER_TYPES;
+   } else {
+   /*
+* The original pattern does not need expansion.
+*/
+   rte_flow_error_set(error, 0,
+  RTE_FLOW_ERROR_TYPE_NONE,
+  NULL, NULL);
+   return NULL;
+   }
}
}
/* Create RSS expansions in dedicated PTYPE flow group */
@@ -513,11 +531,10 @@ flow_nta_handle_rss(struct rte_eth_dev *dev,
   NULL, "cannot get RSS PTYPE group");
return NULL;
}
-   ptype_rss_conf = *rss_conf;
mlx5_nta_rss_init_ptype_ctx(&rss_ctx, dev, &ptype_attr, ptype_pattern,
-   ptype_actions, rss_conf, &expansion_head,
+   ptype_actions, &ptype_rss_conf, 
&expansion_head,
error, item_flags, flow_type, external);
-   rss_miss = mlx5_hw_rss_ptype_create_miss_flow(dev, rss_conf, 
ptype_attr.group,
+   rss_miss = mlx5_hw_rss_ptype_create_miss_flow(dev, &ptype_rss_conf, 
ptype_attr.group,
  external, error);
if (!rss_miss)
goto error;
-- 
2.25.1



Re: [RFC] eal: provide option to use compiler memcpy instead of RTE

2024-06-02 Thread Mattias Rönnblom

On 2024-05-29 23:56, Stephen Hemminger wrote:

On Mon, 27 May 2024 13:11:51 +0200
Mattias Rönnblom  wrote:


#ifdef RTE_USE_CC_MEMCPY
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 256);
+}
+
+static inline void *
+rte_memcpy(void *dst, const void *src, size_t n)
+{
+   return memcpy(dst, src, n);
+}
+#endif /* RTE_USE_CC_MEMCPY */
+
+#ifdef __cplusplus
+}
+#endif


You may need to make these macros to fully engage the checking
options of GCC, fortify, coverity etc. Not sure if all the tools
are smart enough to see through an inline.


At least GCC is, provided you compile with optimization enabled. That 
goes for both overlapping memcpy() warning and static buffer overruns.


clang doesn't warn about overlapping memcpy() and fails to follow 
function calls, even with optimization enabled, seemingly. Same for ICX.


Static analysis tools that can't beat the compiler seems like they would 
be of limited use.


With macros you'll lose the type checking, which in the rte_memcpy() 
case doesn't matter, but rte_mov*() case it does.


I'll sure it break something to change this to macros, although it would 
be good if clang would also generate a warning (for application code 
using ).


Re: [RFC v2] eal: provide option to use compiler memcpy instead of RTE

2024-06-02 Thread Mattias Rönnblom

On 2024-05-31 18:50, Stephen Hemminger wrote:

On Fri, 31 May 2024 07:19:41 +0200
Mattias Rönnblom  wrote:


On 2024-05-28 17:09, Bruce Richardson wrote:

On Tue, May 28, 2024 at 07:59:36AM -0700, Stephen Hemminger wrote:

On Tue, 28 May 2024 10:19:15 +0200
Mattias Rönnblom  wrote:
  
  


I've tested this patch some with DSW micro benchmarks, and the result is
a 2.5% reduction of the DSW+testapp overhead with cc/libc memcpy. GCC 11.4.

We've also run characteristic test suite of a large, real world app.
Here, we saw no effect. GCC 10.5.

x86_64 in both cases (Skylake and Raptor Lake).

Last time we did the same, there were a noticeable performance
degradation in both the above cases.

This is not a lot of data points, but I think it we should consider
making the custom RTE memcpy() implementations optional in the next
release, and if no-one complains, remove the implementations in the next
release.


Lets go farther.

1. Announce that rte_memcpy will be marked deprecated in 24.11 release

2. In 24.11 do a global replace of rte_memcpy on the tree.
 And mark rte_memcpy as deprecated.

3. In 25.11 it can go away.


While I'd like us to be able to do so, I believe that to be premature. We
need to see where/if there are regressions first, and see about fixing
them.

/Bruce


Should I turn this RFC into a PATCH?

Is use_cc_memcpy a good name for the configuration parameter?



I did a slightly more direct test and found a couple of things:
1. Ena driver is redefining memcpy as rte_memcpy, this should be removed 
and should have
   been blocked during code review.


Wouldn't that hack continue to work? Provided rte_memcpy() is a 
function, and the  header is included prior to the memcpy 
redefinition.



2. A couple of drivers are implicitly expecting simd vector routines to be 
available.
   This works because rte_memcpy.h includes rte_vect.h.  The fix is to have 
these
   places include rte_vect.h



I noticed this as well. I'll add patches for those drivers.


[RFC v3 0/5] Optionally have rte_memcpy delegate to compiler memcpy

2024-06-02 Thread Mattias Rönnblom
This patch set make DPDK library, driver, and application code use the
compiler/libc memcpy() by default when functions in  are
invoked.

The various custom DPDK rte_memcpy() implementations may be retained
by means of a build-time option.

This patch set only make a difference on x86, PPC and ARM. Loongarch
and RISCV already used compiler/libc memcpy().

Mattias Rönnblom (5):
  event/dlb2: include headers for vector and memory copy APIs
  net/octeon_ep: properly include vector API header file
  distributor: properly include vector API header file
  fib: properly include vector API header file
  eal: provide option to use compiler memcpy instead of RTE

 config/meson.build |  1 +
 drivers/event/dlb2/dlb2.c  |  2 +
 drivers/net/octeon_ep/otx_ep_ethdev.c  |  2 +
 lib/distributor/rte_distributor.c  |  1 +
 lib/eal/arm/include/rte_memcpy.h   | 10 +
 lib/eal/include/generic/rte_memcpy.h   | 61 +++---
 lib/eal/loongarch/include/rte_memcpy.h | 53 ++
 lib/eal/ppc/include/rte_memcpy.h   | 10 +
 lib/eal/riscv/include/rte_memcpy.h | 53 ++
 lib/eal/x86/include/meson.build|  1 +
 lib/eal/x86/include/rte_memcpy.h   | 11 -
 lib/fib/trie.c |  1 +
 meson_options.txt  |  2 +
 13 files changed, 102 insertions(+), 106 deletions(-)

-- 
2.34.1



[RFC v3 1/5] event/dlb2: include headers for vector and memory copy APIs

2024-06-02 Thread Mattias Rönnblom
The DLB2 PMD depended on  being included as a side-effect
of  being included.

In addition, DLB2 used rte_memcpy() but did not include ,
but rather depended on other include files to do so.

This patch addresses both of those issues.

Signed-off-by: Mattias Rönnblom 
---
 drivers/event/dlb2/dlb2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c
index 0b91f03956..19f90b8f8d 100644
--- a/drivers/event/dlb2/dlb2.c
+++ b/drivers/event/dlb2/dlb2.c
@@ -25,11 +25,13 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #include "dlb2_priv.h"
 #include "dlb2_iface.h"
-- 
2.34.1



[RFC v3 2/5] net/octeon_ep: properly include vector API header file

2024-06-02 Thread Mattias Rönnblom
The octeon_ip driver relied on , but failed to provide a
direct include of this file.

Signed-off-by: Mattias Rönnblom 
---
 drivers/net/octeon_ep/otx_ep_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/octeon_ep/otx_ep_ethdev.c 
b/drivers/net/octeon_ep/otx_ep_ethdev.c
index 46211361a0..b069216629 100644
--- a/drivers/net/octeon_ep/otx_ep_ethdev.c
+++ b/drivers/net/octeon_ep/otx_ep_ethdev.c
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+#include 
+
 #include "otx_ep_common.h"
 #include "otx_ep_vf.h"
 #include "otx2_ep_vf.h"
-- 
2.34.1



[RFC v3 4/5] fib: properly include vector API header file

2024-06-02 Thread Mattias Rönnblom
The trie implementation of the fib library relied on , but
failed to provide a direct include of this file.

Signed-off-by: Mattias Rönnblom 
---
 lib/fib/trie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/fib/trie.c b/lib/fib/trie.c
index 09470e7287..74db8863df 100644
--- a/lib/fib/trie.c
+++ b/lib/fib/trie.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
-- 
2.34.1



[RFC v3 3/5] distributor: properly include vector API header file

2024-06-02 Thread Mattias Rönnblom
The distributor library relied on , but failed to provide
a direct include of this file.

Signed-off-by: Mattias Rönnblom 
---
 lib/distributor/rte_distributor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/distributor/rte_distributor.c 
b/lib/distributor/rte_distributor.c
index e58727cdc2..1389efc03f 100644
--- a/lib/distributor/rte_distributor.c
+++ b/lib/distributor/rte_distributor.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "rte_distributor.h"
 #include "rte_distributor_single.h"
-- 
2.34.1



[RFC v3 5/5] eal: provide option to use compiler memcpy instead of RTE

2024-06-02 Thread Mattias Rönnblom
Provide build option to have functions in  delegate to
the standard compiler/libc memcpy(), instead of using the various
custom DPDK, handcrafted, per-architecture rte_memcpy()
implementations.

A new meson build option 'use_cc_memcpy' is added. By default,
the compiler/libc memcpy() is used.

The performance benefits of the custom DPDK rte_memcpy()
implementations have been diminishing with every compiler release, and
with current toolchains the usage of a custom memcpy() implementation
may even be a liability.

An additional benefit of this change is that compilers and static
analysis tools have an easier time detecting incorrect usage of
memcpy() (e.g., buffer overruns, or overlapping source and destination
buffers).

This patch makes DPDK and DPDK applications using  use
compiler/libc memcpy() by default, but leaves an option to stay on the
custom DPDK implementations, would that prove beneficial for certain
applications or architectures.

RFC v3:
 o Fix missing #endif on loongarch.
 o PPC and RISCV now implemented, meaning all architectures are supported.
 o Unnecessary  include is removed from .

RFC v2:
 * Fix bug where rte_memcpy.h was not installed on x86.
 * Made attempt to make Loongarch compile.

Signed-off-by: Mattias Rönnblom 
---
 config/meson.build |  1 +
 lib/eal/arm/include/rte_memcpy.h   | 10 +
 lib/eal/include/generic/rte_memcpy.h   | 61 +++---
 lib/eal/loongarch/include/rte_memcpy.h | 53 ++
 lib/eal/ppc/include/rte_memcpy.h   | 10 +
 lib/eal/riscv/include/rte_memcpy.h | 53 ++
 lib/eal/x86/include/meson.build|  1 +
 lib/eal/x86/include/rte_memcpy.h   | 11 -
 meson_options.txt  |  2 +
 9 files changed, 96 insertions(+), 106 deletions(-)

diff --git a/config/meson.build b/config/meson.build
index 8c8b019c25..456056628e 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -353,6 +353,7 @@ endforeach
 # set other values pulled from the build options
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
+dpdk_conf.set('RTE_USE_CC_MEMCPY', get_option('use_cc_memcpy'))
 dpdk_conf.set('RTE_ENABLE_STDATOMIC', get_option('enable_stdatomic'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
 dpdk_conf.set('RTE_PKTMBUF_HEADROOM', get_option('pkt_mbuf_headroom'))
diff --git a/lib/eal/arm/include/rte_memcpy.h b/lib/eal/arm/include/rte_memcpy.h
index 47dea9a8cc..e8aff722df 100644
--- a/lib/eal/arm/include/rte_memcpy.h
+++ b/lib/eal/arm/include/rte_memcpy.h
@@ -5,10 +5,20 @@
 #ifndef _RTE_MEMCPY_ARM_H_
 #define _RTE_MEMCPY_ARM_H_
 
+#include 
+
+#ifdef RTE_USE_CC_MEMCPY
+
+#include 
+
+#else
+
 #ifdef RTE_ARCH_64
 #include 
 #else
 #include 
 #endif
 
+#endif /* RTE_USE_CC_MEMCPY */
+
 #endif /* _RTE_MEMCPY_ARM_H_ */
diff --git a/lib/eal/include/generic/rte_memcpy.h 
b/lib/eal/include/generic/rte_memcpy.h
index e7f0f8eaa9..cae06117fb 100644
--- a/lib/eal/include/generic/rte_memcpy.h
+++ b/lib/eal/include/generic/rte_memcpy.h
@@ -5,12 +5,19 @@
 #ifndef _RTE_MEMCPY_H_
 #define _RTE_MEMCPY_H_
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * @file
  *
  * Functions for vectorised implementation of memcpy().
  */
 
+#include 
+#include 
+
 /**
  * Copy 16 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -35,8 +42,6 @@ rte_mov16(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov32(uint8_t *dst, const uint8_t *src);
 
-#ifdef __DOXYGEN__
-
 /**
  * Copy 48 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -49,8 +54,6 @@ rte_mov32(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov48(uint8_t *dst, const uint8_t *src);
 
-#endif /* __DOXYGEN__ */
-
 /**
  * Copy 64 bytes from one location to another using optimised
  * instructions. The locations should not overlap.
@@ -87,8 +90,6 @@ rte_mov128(uint8_t *dst, const uint8_t *src);
 static inline void
 rte_mov256(uint8_t *dst, const uint8_t *src);
 
-#ifdef __DOXYGEN__
-
 /**
  * Copy bytes from one location to another. The locations must not overlap.
  *
@@ -111,6 +112,52 @@ rte_mov256(uint8_t *dst, const uint8_t *src);
 static void *
 rte_memcpy(void *dst, const void *src, size_t n);
 
-#endif /* __DOXYGEN__ */
+#ifdef RTE_USE_CC_MEMCPY
+static inline void
+rte_mov16(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 16);
+}
+
+static inline void
+rte_mov32(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 32);
+}
+
+static inline void
+rte_mov48(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 48);
+}
+
+static inline void
+rte_mov64(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 64);
+}
+
+static inline void
+rte_mov128(uint8_t *dst, const uint8_t *src)
+{
+   memcpy(dst, src, 128);
+}
+
+static inline void
+rte_mov256(uint8_t *dst, co

RE: [RFC v3 5/5] eal: provide option to use compiler memcpy instead of RTE

2024-06-02 Thread Morten Brørup
> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com]
> Sent: Sunday, 2 June 2024 14.39
> 
> Provide build option to have functions in  delegate to
> the standard compiler/libc memcpy(), instead of using the various
> custom DPDK, handcrafted, per-architecture rte_memcpy()
> implementations.
> 
> A new meson build option 'use_cc_memcpy' is added. By default,
> the compiler/libc memcpy() is used.
> 
> The performance benefits of the custom DPDK rte_memcpy()
> implementations have been diminishing with every compiler release, and
> with current toolchains the usage of a custom memcpy() implementation
> may even be a liability.
> 
> An additional benefit of this change is that compilers and static
> analysis tools have an easier time detecting incorrect usage of
> memcpy() (e.g., buffer overruns, or overlapping source and destination
> buffers).
> 
> This patch makes DPDK and DPDK applications using  use
> compiler/libc memcpy() by default, but leaves an option to stay on the
> custom DPDK implementations, would that prove beneficial for certain
> applications or architectures.
> 
> RFC v3:
>  o Fix missing #endif on loongarch.
>  o PPC and RISCV now implemented, meaning all architectures are
> supported.
>  o Unnecessary  include is removed from .
> 
> RFC v2:
>  * Fix bug where rte_memcpy.h was not installed on x86.
>  * Made attempt to make Loongarch compile.
> 
> Signed-off-by: Mattias Rönnblom 
> ---

We should keep pushing DPDK forward and cleaning up old cruft along the way.

The memcpy discussion has convinced me that:
1. This change is a good idea, and
2. Mainstream compilers are sufficiently mature to do it now.

So, for the series,
Acked-by: Morten Brørup 

>  static inline void
>  rte_mov32(uint8_t *dst, const uint8_t *src);

While at it, would it be somehow beneficial to change these from uint8_t* to 
void* or char* (keeping const where relevant)?



Re: [PATCH] net/hns3: fix Rx packet truncation when KEEP CRC enabled

2024-06-02 Thread Jie Hai

On 2024/3/1 19:10, Ferruh Yigit wrote:

On 3/1/2024 6:55 AM, huangdengdui wrote:



On 2024/2/29 17:25, Ferruh Yigit wrote:

On 2/29/2024 3:58 AM, huangdengdui wrote:



On 2024/2/28 21:07, Ferruh Yigit wrote:

On 2/28/2024 2:27 AM, huangdengdui wrote:



On 2024/2/27 0:43, Ferruh Yigit wrote:

On 2/26/2024 3:16 AM, Jie Hai wrote:

On 2024/2/23 21:53, Ferruh Yigit wrote:

On 2/20/2024 3:58 AM, Jie Hai wrote:

Hi, Ferruh,

Thanks for your review.

On 2024/2/7 22:15, Ferruh Yigit wrote:

On 2/6/2024 1:10 AM, Jie Hai wrote:

From: Dengdui Huang 

When KEEP_CRC offload is enabled, some packets will be truncated and
the CRC is still be stripped in following cases:
1. For HIP08 hardware, the packet type is TCP and the length
  is less than or equal to 60B.
2. For other hardwares, the packet type is IP and the length
  is less than or equal to 60B.



If a device doesn't support the offload by some packets, it can be
option to disable offload for that device, instead of calculating it in
software and append it.


The KEEP CRC feature of hns3 is faulty only in the specific packet
type and small packet(<60B) case.
What's more, the small ethernet packet is not common.


Unless you have a specific usecase, or requirement to support the
offload.


Yes, some users of hns3 are already using this feature.
So we cannot drop this offload


<...>


@@ -2492,10 +2544,16 @@ hns3_recv_pkts_simple(void *rx_queue,
    goto pkt_err;
      rxm->packet_type = hns3_rx_calc_ptype(rxq, l234_info,
ol_info);
-
    if (rxm->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC)
    rxm->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP;
    +    if (unlikely(rxq->crc_len > 0)) {
+    if (hns3_need_recalculate_crc(rxq, rxm))
+    hns3_recalculate_crc(rxq, rxm);
+    rxm->pkt_len -= rxq->crc_len;
+    rxm->data_len -= rxq->crc_len;



Removing 'crc_len' from 'mbuf->pkt_len' & 'mbuf->data_len' is
practically same as stripping CRC.

We don't count CRC length in the statistics, but it should be
accessible
in the payload by the user.

Our drivers are behaving exactly as you say.



If so I missed why mbuf 'pkt_len' and 'data_len' reduced by
'rxq->crc_len', can you please explain what above lines does?



@@ -2470,8 +2523,7 @@ hns3_recv_pkts_simple(void *rx_queue,
  rxdp->rx.bd_base_info = 0;

  rxm->data_off = RTE_PKTMBUF_HEADROOM;
-    rxm->pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len)) -
-    rxq->crc_len;
+    rxm->pkt_len = rte_le_to_cpu_16(rxd.rx.pkt_len);

In the previous code above, the 'pkt_len' is set to the length obtained
from the BD. the length obtained from the BD already contains CRC length.
But as you said above, the DPDK requires that the length of the mbuf
does not contain CRC length . So we subtract 'rxq->crc_len' from
mbuf'pkt_len' and 'data_len'. This patch doesn't change the logic, it
just moves the code around.



Nope, I am not saying mbuf length shouldn't contain CRC length, indeed
it is other way around and this is our confusion.

CRC length shouldn't be in the statistics, I mean in received bytes stats.
Assume that received packet is 128 bytes and we know it has the CRC,
Rx received bytes stat should be 124 (rx_bytes = 128 - CRC = 124)

But mbuf->data_len & mbuf->pkt_len should have full frame length,
including CRC.

As application explicitly requested to KEEP CRC, it will know last 4
bytes are CRC.
Anything after 'mbuf->data_len' in the mbuf buffer is not valid, so if
you reduce 'mbuf->data_len' by CRC size, application can't know if 4
bytes after 'mbuf->data_len' is valid CRC or not.


I agree with you.

But the implementation of other PMDs supported KEEP_CRC is like this.
In addition, there are probably many users that are already using it.
If we modify it, it may cause applications incompatible.

what do you think?


This is documented in the ethdev [1], better to follow the documentation
for all PMDs, can you please highlight the relevant driver code, we can
discuss it with their maintainers.

Alternatively we can document this additionally in the KEEP_CRC feature
document if it helps for the applications.


[1]
https://git.dpdk.org/dpdk/tree/lib/ethdev/rte_ethdev.h?h=v23.11#n257


Currently,this documentation does not describe whether pkt_len and data_len 
should contain crc_len.



I think it is clear that pkt_len and data_len should contain crc_len, we
can ask for more comments.

This patch doesn't change the logic for hns3 PMD and the implementation of
other PMDs supported KEEP_CRC is like hns3 PMD. Can we merge this patch first?



If hns3 behaving against the documented behavior, I don't understand why
you are pushing for merging this patch, instead of fixing it.





Other drivers behavior is something else, not directly related to this
patch, but again if you can provide references we can discuss with their
maintainers.


Hi, all maintainers,
We need your opinions on whether pkt_len and data_len 

Re: [PATCH] net/hns3: fix Rx packet truncation when KEEP CRC enabled

2024-06-02 Thread Stephen Hemminger
On Mon, 3 Jun 2024 09:38:19 +0800
Jie Hai  wrote:

> On 2024/3/1 19:10, Ferruh Yigit wrote:
> > On 3/1/2024 6:55 AM, huangdengdui wrote:  
> >>
> >>
> >> On 2024/2/29 17:25, Ferruh Yigit wrote:  
> >>> On 2/29/2024 3:58 AM, huangdengdui wrote:  
> 
> 
>  On 2024/2/28 21:07, Ferruh Yigit wrote:  
> > On 2/28/2024 2:27 AM, huangdengdui wrote:  
> >>
> >>
> >> On 2024/2/27 0:43, Ferruh Yigit wrote:  
> >>> On 2/26/2024 3:16 AM, Jie Hai wrote:  
>  On 2024/2/23 21:53, Ferruh Yigit wrote:  
> > On 2/20/2024 3:58 AM, Jie Hai wrote:  
> >> Hi, Ferruh,
> >>
> >> Thanks for your review.
> >>
> >> On 2024/2/7 22:15, Ferruh Yigit wrote:  
> >>> On 2/6/2024 1:10 AM, Jie Hai wrote:  
>  From: Dengdui Huang 
> 
>  When KEEP_CRC offload is enabled, some packets will be truncated 
>  and
>  the CRC is still be stripped in following cases:
>  1. For HIP08 hardware, the packet type is TCP and the length
>    is less than or equal to 60B.
>  2. For other hardwares, the packet type is IP and the length
>    is less than or equal to 60B.
>   
> >>>
> >>> If a device doesn't support the offload by some packets, it can be
> >>> option to disable offload for that device, instead of calculating 
> >>> it in
> >>> software and append it.  
> >>
> >> The KEEP CRC feature of hns3 is faulty only in the specific packet
> >> type and small packet(<60B) case.
> >> What's more, the small ethernet packet is not common.
> >>  
> >>> Unless you have a specific usecase, or requirement to support the
> >>> offload.  
> >>
> >> Yes, some users of hns3 are already using this feature.
> >> So we cannot drop this offload
> >>  
> >>> <...>
> >>>  
>  @@ -2492,10 +2544,16 @@ hns3_recv_pkts_simple(void *rx_queue,
>      goto pkt_err;
>        rxm->packet_type = hns3_rx_calc_ptype(rxq, 
>  l234_info,
>  ol_info);
>  -
>      if (rxm->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC)
>      rxm->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP;
>      +    if (unlikely(rxq->crc_len > 0)) {
>  +    if (hns3_need_recalculate_crc(rxq, rxm))
>  +    hns3_recalculate_crc(rxq, rxm);
>  +    rxm->pkt_len -= rxq->crc_len;
>  +    rxm->data_len -= rxq->crc_len;
>   
> >>>
> >>> Removing 'crc_len' from 'mbuf->pkt_len' & 'mbuf->data_len' is
> >>> practically same as stripping CRC.
> >>>
> >>> We don't count CRC length in the statistics, but it should be
> >>> accessible
> >>> in the payload by the user.  
> >> Our drivers are behaving exactly as you say.
> >>  
> >
> > If so I missed why mbuf 'pkt_len' and 'data_len' reduced by
> > 'rxq->crc_len', can you please explain what above lines does?
> >
> >  
>  @@ -2470,8 +2523,7 @@ hns3_recv_pkts_simple(void *rx_queue,
>    rxdp->rx.bd_base_info = 0;
> 
>    rxm->data_off = RTE_PKTMBUF_HEADROOM;
>  -    rxm->pkt_len = (uint16_t)(rte_le_to_cpu_16(rxd.rx.pkt_len)) 
>  -
>  -    rxq->crc_len;
>  +    rxm->pkt_len = rte_le_to_cpu_16(rxd.rx.pkt_len);
> 
>  In the previous code above, the 'pkt_len' is set to the length 
>  obtained
>  from the BD. the length obtained from the BD already contains CRC 
>  length.
>  But as you said above, the DPDK requires that the length of the mbuf
>  does not contain CRC length . So we subtract 'rxq->crc_len' from
>  mbuf'pkt_len' and 'data_len'. This patch doesn't change the logic, it
>  just moves the code around.
>   
> >>>
> >>> Nope, I am not saying mbuf length shouldn't contain CRC length, indeed
> >>> it is other way around and this is our confusion.
> >>>
> >>> CRC length shouldn't be in the statistics, I mean in received bytes 
> >>> stats.
> >>> Assume that received packet is 128 bytes and we know it has the CRC,
> >>> Rx received bytes stat should be 124 (rx_bytes = 128 - CRC = 124)
> >>>
> >>> But mbuf->data_len & mbuf->pkt_len should have full frame length,
> >>> including CRC.
> >>>
> >>> As application explicitly requested to KEEP CRC, it will know last 4
> >>> bytes are CRC.
> >>> Anything after 'mbuf->data_len' in the mbuf buffer is not valid, so if
> >>> you reduce 'mbuf->data_len' by CRC size, application can't know if 4
> >>> b

RE: [PATCH] net/hns3: fix Rx packet truncation when KEEP CRC enabled

2024-06-02 Thread Morten Brørup
> IMHO the only sane thing is:
>   -if keep crc is enabled then pkt_len and data_len include the extra
> bytes for the CRC.

Agree.
Consider a segmented packet where only the CRC is in the last segment.
The length of that last segment would be zero if the CRC length was not 
included in pkt_len and data_len.
Or, if the last two bytes of the CRC is in the last segment; the length of that 
last segment would be -2 if not including the CRC length.

>   -if keep crc is disabled, then pkt_len and data_len match the length
> of the packet without the CRC.



Re: [Help] O-RAN Fronthaul CUS-U data structure implementation

2024-06-02 Thread Mattia Milani

Hi Lincoln,

Thank you very much for the feedback, I'll look into the documentation 
you provided.
In the meanwhile may I ask if there is a publicly available recording of 
the training session you mentioned? (or similar training sessions done 
in the past).


Thank you,
Mattia

On 31/05/2024 14:40, Lincoln Lavoie wrote:



You don't often get email from lylav...@iol.unh.edu. Learn why this is 
important 




CAUTION:This is an external email. Please be very careful when 
clicking links or opening attachments. See the URL nok.it/ext for 
additional information.


Hi Mattia,

The code is being used, and there are some patches in flight, but are 
currently coming from Open Air Interface folks. There's a lot of 
documentation here: 
https://gitlab.eurecom.fr/oai/openairinterface5g/-/blob/develop/doc/ORAN_FHI7.2_Tutorial.md?ref_type=heads


We are actually running this in the lab and even hosted a 
training session on the O-RAN aspects of the setup yesterday with 
Northeastern and the OAI / VIAVI teams. Point is, it's not really 
completely dead, just not a lot of movement in that one repo.


On the hardware, it has been run on both Nvidia and Intel NICs, but 
there are some requirements on things like PTP time stamping, but that 
comes with the O-RAN fronthaul requirements.


For the VLAN header, that is handled with the VFs are creates and 
"handed" to the xran processes that get embedded into the DU (at least 
in the OAI implementation).


Cheers,
Lincoln


On Wed, May 29, 2024 at 4:04 AM Mattia Milani 
 wrote:


Dear Lincoln,

Thank you very much for providing this reference.

I've to admit that I didn't know about this library but I'm
looking into it
but I have some questions/concerns.

This library seems to be abandoned since 2 years, at least this is
what I
see from both the documentation and also their version control system:
https://gerrit.o-ran-sc.org/r/gitweb?p=o-du%2Fphy.git;a=summary
Do
you think it's still reliable?

The assumptions seems to be quite restrictive on the HW
requirements of
this library listed here:

https://docs.o-ran-sc.org/projects/o-ran-sc-o-du-phy/en/latest/Assumptions_Dependencies.html#requirements

With
my experiments I'm just exploring using virtual interfaces and my HW,
I would like to avoid those constraints.

From what I can see some of the data structures already provided
by DPDK
are re-defined by this library, like the eth header data structure
and the eCPRI header.
I don't know if this is due to implementation reasons or because
it has been
built with an old version of DPDK.
But without taking in consideration the endianess like rte_ecpri.h
already does in DPDK
and without supporting some msg types (line 94 file xran_pkt.h).

Digging into the source code a bit I found some of the data
structures I'm referring to
in my original message, i.e. the ecpri Seq. ID. The following code
puzzles me, source file at:

https://gerrit.o-ran-sc.org/r/gitweb?p=o-du/phy.git;a=blob;f=fhi_lib/lib/api/xran_pkt.h;h=314b8d6b7d08f153369de5ad535702f50a574a35;hb=HEAD
line 228:
struct xran_ecpri_hdr
 {
 union xran_ecpri_cmn_hdr cmnhdr;
 rte_be16_t ecpri_xtc_id;    /**< 3.1.3.1.6 real time
control data / IQ data transfer message series identifier */
 union ecpri_seq_id ecpri_seq_id;   /**< 3.1.3.1.7 message
identifier */
 } __rte_packed;

Seems strange to me that ecpri_xtc_id is not a data structure on
it's own, providing access to DU port, Band Sector etc.
Also, the 'xran_pkt_comm_hdr' data structure at line 336 assumes
that the packet doesn't have a VLAN header, but,
I don't know if this is managed elsewhere.

Given all that, I'm still of the idea that could be useful to have
those kind of headers directly in DPDK
but I'm open to reconsider my statement, please let me know if you
have more information regarding
this library.

Best regards,
Mattia


On 28/05/2024 16:37, Lincoln Lavoie wrote:



You don't often get email from lylav...@iol.unh.edu. Learn why
this is important 



CAUTION:This is an external email. Please be very careful when
clicking links or opening attachments. See the URL nok.it/ext
 for additional information.

Hi Mattia,

Have you looked into the O-RAN OSC open fronthaul phy
implementation?

https://docs.o-ran-sc.org/projects/o-ran-sc-o-du-phy/en/latest/Architecture-Overview_fh.html

Cheers,
Lincoln

On Tue, May 28, 2024 at 10:31 AM Mattia Milani
 wrote: