[PATCH] ethdev: fix 32-bit build with GCC-13

2023-11-01 Thread Ruifeng Wang
aarch32 build with gcc-13.0.1 generated following warning:

In function 'memcpy',
inlined from 'rte_memcpy' at ../lib/eal/arm/include/rte_memcpy_32.h:296:9,
inlined from 'rte_flow_conv_action_conf' at ../lib/ethdev/rte_flow.c:726:20,
inlined from 'rte_flow_conv_actions' at ../lib/ethdev/rte_flow.c:936:10:
warning: '__builtin_memcpy' specified bound 4294967264 exceeds maximum object 
size 2147483647 [-Wstringop-overflow=]

The issue is due to possible wrapping in unsigned arithmetic.
The 'size' can be 0. 'off' is 32. When 'tmp' is equal to (unsigned)-32,
the copy length is more than half the address space. Hence the warning.

Casted variables to 64-bit to avoid wrapping.

Fixes: 063911ee1df4 ("ethdev: add flow API object converter")
Cc: adrien.mazarg...@6wind.com
Cc: sta...@dpdk.org

Reported-by: Luca Boccassi 
Signed-off-by: Ruifeng Wang 
---
 lib/ethdev/rte_flow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
index 3a67f1aaba..2a5a057195 100644
--- a/lib/ethdev/rte_flow.c
+++ b/lib/ethdev/rte_flow.c
@@ -722,7 +722,7 @@ rte_flow_conv_action_conf(void *buf, const size_t size,
if (src.rss->key_len && src.rss->key) {
off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->key));
tmp = sizeof(*src.rss->key) * src.rss->key_len;
-   if (size >= off + tmp)
+   if (size >= (uint64_t)off + (uint64_t)tmp)
dst.rss->key = rte_memcpy
((void *)((uintptr_t)dst.rss + off),
 src.rss->key, tmp);
@@ -731,7 +731,7 @@ rte_flow_conv_action_conf(void *buf, const size_t size,
if (src.rss->queue_num) {
off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->queue));
tmp = sizeof(*src.rss->queue) * src.rss->queue_num;
-   if (size >= off + tmp)
+   if (size >= (uint64_t)off + (uint64_t)tmp)
dst.rss->queue = rte_memcpy
((void *)((uintptr_t)dst.rss + off),
 src.rss->queue, tmp);
-- 
2.25.1



[PATCH] eal: stop iteration after lcore info is processed

2023-11-01 Thread Ruifeng Wang
Telemetry iterates on lcore ID to collect info of a specific lcore.
Since only one lcore is processed at a time, the iteration can stop
when a matching lcore is found.

Fixes: f2b852d909f9 ("eal: add lcore info in telemetry")
Cc: rja...@redhat.com
Cc: sta...@dpdk.org

Signed-off-by: Ruifeng Wang 
---
 lib/eal/common/eal_common_lcore.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/eal/common/eal_common_lcore.c 
b/lib/eal/common/eal_common_lcore.c
index ceda714ca5..0d6812ec75 100644
--- a/lib/eal/common/eal_common_lcore.c
+++ b/lib/eal/common/eal_common_lcore.c
@@ -546,7 +546,8 @@ lcore_telemetry_info_cb(unsigned int lcore_id, void *arg)
rte_tel_data_add_dict_uint(info->d, "busy_cycles", 
usage.busy_cycles);
}
 
-   return 0;
+   /* Return non-zero positive value to stop iterating over lcore_id. */
+   return 1;
 }
 
 static int
-- 
2.25.1



[Bug 1108] net/i40e completely ignores flow rule transfer attribute prio to 22.11-rc1

2023-11-01 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=1108

dengkaiwen (kaiwenx.d...@intel.com) changed:

   What|Removed |Added

 Status|CONFIRMED   |RESOLVED
 CC||kaiwenx.d...@intel.com
 Resolution|--- |FIXED

--- Comment #7 from dengkaiwen (kaiwenx.d...@intel.com) ---
Hi All,

I'm going to close this ticket for now, so please contact me if you still have
questions.

Thanks
Kaiwen Deng

-- 
You are receiving this mail because:
You are the assignee for the bug.

[Bug 1109] net/ice completely ignores flow rule transfer attribute prio to 22.11-rc1

2023-11-01 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=1109

dengkaiwen (kaiwenx.d...@intel.com) changed:

   What|Removed |Added

 CC||kaiwenx.d...@intel.com
 Resolution|--- |FIXED
 Status|UNCONFIRMED |RESOLVED

--- Comment #4 from dengkaiwen (kaiwenx.d...@intel.com) ---
Hi All,

I'm going to close this ticket for now, so please contact me if you still have
questions.

Thanks
Kaiwen Deng

-- 
You are receiving this mail because:
You are the assignee for the bug.

[PATCH] net/enic: avoid extra unlock when setting MTU in enic

2023-11-01 Thread Weiguo Li
The 'set_mtu_done' goto statement is being executed in a context
where the 'mtu_lock' has not been previously locked.

To avoid the extra unlocking operation, replace the goto statement
with a return statement.

Fixes: c3e09182bcd6 ("net/enic: support scatter Rx in MTU update")
Cc: sta...@dpdk.org

Signed-off-by: Weiguo Li 
---
 .mailmap | 2 +-
 drivers/net/enic/enic_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..b4f0ae26b8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1500,7 +1500,7 @@ Waterman Cao 
 Weichun Chen 
 Wei Dai 
 Weifeng Li 
-Weiguo Li 
+Weiguo Li  
 Wei Huang 
 Wei Hu 
 Wei Hu (Xavier) 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 19a99a82c5..a6aaa760ca 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1639,7 +1639,7 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
 * packet length.
 */
if (!eth_dev->data->dev_started)
-   goto set_mtu_done;
+   return rc;
 
/*
 * The device has started, re-do RQs on the fly. In the process, we
-- 
2.34.1



RE: [PATCH v3 0/3] add pointer compression API

2023-11-01 Thread Morten Brørup
> From: Paul Szczepanek [mailto:paul.szczepa...@arm.com]
> Sent: Tuesday, 31 October 2023 19.11

[...]

> Test is added that shows potential performance gain from compression.
> In
> this test an array of pointers is passed through a ring between two
> cores.
> It shows the gain which is dependent on the bulk operation size. In
> this
> synthetic test run on ampere altra a substantial (up to 25%)
> performance
> gain is seen if done in bulk size larger than 32. At 32 it breaks even
> and
> lower sizes create a small (less than 5%) slowdown due to overhead.
> 
> In a more realistic mock application running the l3 forwarding dpdk
> example that works in pipeline mode this translated into a ~5%
> throughput
> increase on an ampere altra.

What was the bulk size in this test?

And were the pipeline stages running on the same lcore or individual lcores per 
pipeline stage?



[PATCH v8 03/10] ethdev: support setting and querying RSS algorithm

2023-11-01 Thread Jie Hai
Currently, rte_eth_rss_conf supports configuring and querying
RSS hash functions, rss key and it's length, but not RSS hash
algorithm.

The structure ``rte_eth_dev_info`` is extended by adding a new
field "rss_algo_capa". Drivers are responsible for reporting this
capa and configurations of RSS hash algorithm can be verified based
on the capability. The default value of "rss_algo_capa" is
RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) if drivers do not report it.

The structure ``rte_eth_rss_conf`` is extended by adding a new
field "algorithm". This represents the RSS algorithms to apply.
If the value of "algorithm" used for configuration is a gibberish
value, drivers should report the error.

To check whether the drivers report valid "algorithm", it is set
to default value before querying in rte_eth_dev_rss_hash_conf_get().

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
Acked-by: Huisong Li 
---
 doc/guides/rel_notes/release_23_11.rst |  5 +
 lib/ethdev/rte_ethdev.c| 26 +++
 lib/ethdev/rte_ethdev.h| 29 ++
 lib/ethdev/rte_flow.c  |  1 -
 lib/ethdev/rte_flow.h  | 26 ++-
 5 files changed, 62 insertions(+), 25 deletions(-)

diff --git a/doc/guides/rel_notes/release_23_11.rst 
b/doc/guides/rel_notes/release_23_11.rst
index 95db98d098d8..e207786044f9 100644
--- a/doc/guides/rel_notes/release_23_11.rst
+++ b/doc/guides/rel_notes/release_23_11.rst
@@ -372,6 +372,11 @@ ABI Changes
 * security: struct ``rte_security_ipsec_sa_options`` was updated
   due to inline out-of-place feature addition.
 
+* ethdev: Added "rss_algo_capa" field to ``rte_eth_dev_info`` structure for
+* reporting RSS hash algorithm capability.
+
+* ethdev: Added "algorithm" field to ``rte_eth_rss_conf`` structure for RSS
+  hash algorithm.
 
 Known Issues
 
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 07bb35833ba6..f9bd99d07eb1 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1269,6 +1269,7 @@ int
 rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
  const struct rte_eth_conf *dev_conf)
 {
+   enum rte_eth_hash_function algorithm;
struct rte_eth_dev *dev;
struct rte_eth_dev_info dev_info;
struct rte_eth_conf orig_conf;
@@ -1510,6 +1511,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
nb_rx_q, uint16_t nb_tx_q,
goto rollback;
}
 
+   algorithm = dev_conf->rx_adv_conf.rss_conf.algorithm;
+   if (RTE_ETH_HASH_ALGO_TO_CAPA(algorithm) == 0 ||
+   (dev_info.rss_algo_capa &
+RTE_ETH_HASH_ALGO_TO_CAPA(algorithm)) == 0) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u configured RSS hash algorithm (%u)"
+   "is not in the algorithm capability (0x%" PRIx32 ")\n",
+   port_id, algorithm, dev_info.rss_algo_capa);
+   ret = -EINVAL;
+   goto rollback;
+   }
+
/*
 * Setup new number of Rx/Tx queues and reconfigure device.
 */
@@ -3767,6 +3780,7 @@ rte_eth_dev_info_get(uint16_t port_id, struct 
rte_eth_dev_info *dev_info)
dev_info->min_mtu = RTE_ETHER_MIN_LEN - RTE_ETHER_HDR_LEN -
RTE_ETHER_CRC_LEN;
dev_info->max_mtu = UINT16_MAX;
+   dev_info->rss_algo_capa = RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT);
 
if (*dev->dev_ops->dev_infos_get == NULL)
return -ENOTSUP;
@@ -4716,6 +4730,16 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
return -EINVAL;
}
 
+   if (RTE_ETH_HASH_ALGO_TO_CAPA(rss_conf->algorithm) == 0 ||
+   (dev_info.rss_algo_capa &
+RTE_ETH_HASH_ALGO_TO_CAPA(rss_conf->algorithm)) == 0) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u configured RSS hash algorithm (%u)"
+   "is not in the algorithm capability (0x%" PRIx32 ")\n",
+   port_id, rss_conf->algorithm, dev_info.rss_algo_capa);
+   return -EINVAL;
+   }
+
if (*dev->dev_ops->rss_hash_update == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
@@ -4756,6 +4780,8 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
return -EINVAL;
}
 
+   rss_conf->algorithm = RTE_ETH_HASH_FUNCTION_DEFAULT;
+
if (*dev->dev_ops->rss_hash_conf_get == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev,
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 343a134fdd12..76c45bd759e4 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -445,6 +445,33 @@ struct rte_vlan_filter_conf {
uint64_t ids[64];
 };
 
+/**
+ * Hash function types.
+ */
+enum rte_eth_hash_function {
+   /** DEFAULT means drive

[PATCH v8 00/10] support setting and querying RSS algorithms

2023-11-01 Thread Jie Hai
This patchset is to support setting and querying RSS algorithms.
For this purpose, field "rss_algo_capa" is added to ``rte_eth_dev_info``
and field "algorithm" is added to ``rte_eth_rss_conf``.
The drivers should reports their "rss_algo_capa" if they support
updating RSS algorithms. Otherwise, the "rss_algo_capa" is set to
RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT). The app configures RSS algorithms
by field "algorithm" and the related API should verify "algorithm" with
"rss_algo_capa".

--
v8:
1. rewrite some comments.
2. add check for rss_key_len in ethdev level.
3. add Acked-by: Huisong Li .
4. fix log on RSS hash algorithm.
5. add rte_eth_dev_rss_algo_name to lib/ethdev/version.map.
6. fix RSS algorithm display on testpmd.

v7:
1. fix compile error.
2. add signed-off-by to patch[4/9].
v6:
1. rewrite some comments.
2. add "rss_algo_capa" for `rte_eth_dev_info``.
3. add new API to get name of RSS algorithms

v5:
1. rewrite some comments.
2. check RSS algorithm for drivers supporting RSS.
3. change field "func" of rss_conf to "algorithm".
4. fix commit log for [PATCH v4 4/7].
5. add Acked-by Reshma Pattan.
6. add symmetric_toeplitz_sort for showing.
7. change "hf" to "hash function" for showing.

v4:
1. recomment some definitions related to RSS.
2. allocate static memory for rss_key instead of dynamic.
3. use array of strings to get the name of rss algorithm.
4. add display of rss algorithm with testpmd.

v3:
1. fix commit log for PATCH [1/5].
2. make RSS ABI changes description to start the actual text at the margin.
3. move defnition of enum rte_eth_hash_function to rte_ethdev.h.
4. fix some comment codes.

v2:
1. return error if "func" is invalid.
2. modify the comments of the "func" field.
3. modify commit log of patch [3/5].
4. use malloc instead of rte_malloc.
5. adjust display format of RSS info.
6. remove the string display of rss_hf.

Huisong Li (1):
  net/hns3: support setting and querying RSS hash function

Jie Hai (9):
  ethdev: overwrite some comment related to RSS
  lib/ethdev: check RSS key length
  ethdev: support setting and querying RSS algorithm
  net/hns3: report RSS hash algorithms capability
  app/proc-info: fix never show RSS info
  app/proc-info: adjust the display format of RSS info
  ethdev: add API to get RSS algorithm names
  app/proc-info: support querying RSS hash algorithm
  app/testpmd: add RSS hash algorithms display

 app/proc-info/main.c   | 24 +---
 app/test-pmd/cmdline.c | 29 +++--
 app/test-pmd/config.c  | 29 -
 app/test-pmd/testpmd.h |  2 +-
 doc/guides/rel_notes/release_23_11.rst |  9 +++
 drivers/net/hns3/hns3_common.c |  4 ++
 drivers/net/hns3/hns3_rss.c| 47 ---
 lib/ethdev/rte_ethdev.c| 82 ++
 lib/ethdev/rte_ethdev.h| 79 -
 lib/ethdev/rte_flow.c  |  1 -
 lib/ethdev/rte_flow.h  | 25 +---
 lib/ethdev/version.map |  1 +
 12 files changed, 239 insertions(+), 93 deletions(-)

-- 
2.30.0



[PATCH v8 04/10] net/hns3: report RSS hash algorithms capability

2023-11-01 Thread Jie Hai
The hns3 driver should reports RSS hash algorithm capability
to support updating RSS hash algorithm by
rte_eth_dev_rss_hash_update() or rte_eth_dev_configure().

Signed-off-by: Jie Hai 
Acked-by: Huisong Li 
---
 drivers/net/hns3/hns3_common.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/hns3/hns3_common.c b/drivers/net/hns3/hns3_common.c
index c4d47f43fe44..7f5067ea2fa2 100644
--- a/drivers/net/hns3/hns3_common.c
+++ b/drivers/net/hns3/hns3_common.c
@@ -132,6 +132,10 @@ hns3_dev_infos_get(struct rte_eth_dev *eth_dev, struct 
rte_eth_dev_info *info)
info->reta_size = hw->rss_ind_tbl_size;
info->hash_key_size = hw->rss_key_size;
info->flow_type_rss_offloads = HNS3_ETH_RSS_SUPPORT;
+   info->rss_algo_capa = RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) |
+ RTE_ETH_HASH_ALGO_CAPA_MASK(TOEPLITZ) |
+ RTE_ETH_HASH_ALGO_CAPA_MASK(SIMPLE_XOR) |
+ RTE_ETH_HASH_ALGO_CAPA_MASK(SYMMETRIC_TOEPLITZ);
 
info->default_rxportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE;
info->default_txportconf.burst_size = HNS3_DEFAULT_PORT_CONF_BURST_SIZE;
-- 
2.30.0



[PATCH v8 02/10] lib/ethdev: check RSS key length

2023-11-01 Thread Jie Hai
In rte_eth_dev_rss_hash_conf_get(), the "rss_key_len" should be
greater than or equal to the "hash_key_size" which get from
rte_eth_dev_info_get() API. And the "rss_key" should contain at
least "hash_key_size" bytes. If these requirements are not met,
the query unreliable.

In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), the
"rss_key_len" indicates the length of the "rss_key" in bytes of
the array pointed by "rss_key", it should be equal to the
"hash_key_size" if "rss_key" is not NULL.

This patch checks "rss_key_len" in ethdev level.

Signed-off-by: Jie Hai 
---
 lib/ethdev/rte_ethdev.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index af23ac0ad00f..07bb35833ba6 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1500,6 +1500,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
nb_rx_q, uint16_t nb_tx_q,
goto rollback;
}
 
+   if (dev_conf->rx_adv_conf.rss_conf.rss_key != NULL &&
+   dev_conf->rx_adv_conf.rss_conf.rss_key_len < 
dev_info.hash_key_size) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, valid 
value: %u\n",
+   port_id, dev_conf->rx_adv_conf.rss_conf.rss_key_len,
+   dev_info.hash_key_size);
+   ret = -EINVAL;
+   goto rollback;
+   }
+
/*
 * Setup new number of Rx/Tx queues and reconfigure device.
 */
@@ -4698,6 +4708,14 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
return -ENOTSUP;
}
 
+   if (rss_conf->rss_key != NULL &&
+   rss_conf->rss_key_len != dev_info.hash_key_size) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, valid 
value: %u\n",
+   port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
+   return -EINVAL;
+   }
+
if (*dev->dev_ops->rss_hash_update == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
@@ -4712,6 +4730,7 @@ int
 rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
  struct rte_eth_rss_conf *rss_conf)
 {
+   struct rte_eth_dev_info dev_info = { 0 };
struct rte_eth_dev *dev;
int ret;
 
@@ -4725,6 +4744,18 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
return -EINVAL;
}
 
+   ret = rte_eth_dev_info_get(port_id, &dev_info);
+   if (ret != 0)
+   return ret;
+
+   if (rss_conf->rss_key != NULL &&
+   rss_conf->rss_key_len < dev_info.hash_key_size) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, should not 
be less than: %u\n",
+   port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
+   return -EINVAL;
+   }
+
if (*dev->dev_ops->rss_hash_conf_get == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev,
-- 
2.30.0



[PATCH v8 05/10] net/hns3: support setting and querying RSS hash function

2023-11-01 Thread Jie Hai
From: Huisong Li 

Support setting and querying RSS hash function by ethdev ops.

Signed-off-by: Huisong Li 
Signed-off-by: Dongdong Liu 
Signed-off-by: Jie Hai 
Acked-by: Chengwen Feng 
---
 drivers/net/hns3/hns3_rss.c | 47 +
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/net/hns3/hns3_rss.c b/drivers/net/hns3/hns3_rss.c
index 6126512bd780..010a759f23d9 100644
--- a/drivers/net/hns3/hns3_rss.c
+++ b/drivers/net/hns3/hns3_rss.c
@@ -646,14 +646,14 @@ hns3_dev_rss_hash_update(struct rte_eth_dev *dev,
if (ret)
goto set_tuple_fail;
 
-   if (key) {
-   ret = hns3_rss_set_algo_key(hw, hw->rss_info.hash_algo,
-   key, hw->rss_key_size);
-   if (ret)
-   goto set_algo_key_fail;
-   /* Update the shadow RSS key with user specified */
+   ret = hns3_update_rss_algo_key(hw, rss_conf->algorithm, key, key_len);
+   if (ret != 0)
+   goto set_algo_key_fail;
+
+   if (rss_conf->algorithm != RTE_ETH_HASH_FUNCTION_DEFAULT)
+   hw->rss_info.hash_algo = 
hns3_hash_func_map[rss_conf->algorithm];
+   if (key != NULL)
memcpy(hw->rss_info.key, key, hw->rss_key_size);
-   }
hw->rss_info.rss_hf = rss_hf;
rte_spinlock_unlock(&hw->lock);
 
@@ -769,7 +769,13 @@ int
 hns3_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
   struct rte_eth_rss_conf *rss_conf)
 {
+   const uint8_t hash_func_map[] = {
+   [HNS3_RSS_HASH_ALGO_TOEPLITZ] = RTE_ETH_HASH_FUNCTION_TOEPLITZ,
+   [HNS3_RSS_HASH_ALGO_SIMPLE] = RTE_ETH_HASH_FUNCTION_SIMPLE_XOR,
+   [HNS3_RSS_HASH_ALGO_SYMMETRIC_TOEP] = 
RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ,
+   };
struct hns3_adapter *hns = dev->data->dev_private;
+   uint8_t rss_key[HNS3_RSS_KEY_SIZE_MAX] = {0};
struct hns3_hw *hw = &hns->hw;
uint8_t hash_algo;
int ret;
@@ -777,26 +783,27 @@ hns3_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
rte_spinlock_lock(&hw->lock);
ret = hns3_rss_hash_get_rss_hf(hw, &rss_conf->rss_hf);
if (ret != 0) {
+   rte_spinlock_unlock(&hw->lock);
hns3_err(hw, "obtain hash tuples failed, ret = %d", ret);
-   goto out;
+   return ret;
+   }
+
+   ret = hns3_rss_get_algo_key(hw, &hash_algo, rss_key, hw->rss_key_size);
+   if (ret != 0) {
+   rte_spinlock_unlock(&hw->lock);
+   hns3_err(hw, "obtain hash algo and key failed, ret = %d", ret);
+   return ret;
}
+   rte_spinlock_unlock(&hw->lock);
 
-   /* Get the RSS Key required by the user */
+   /* Get the RSS Key if user required. */
if (rss_conf->rss_key && rss_conf->rss_key_len >= hw->rss_key_size) {
-   ret = hns3_rss_get_algo_key(hw, &hash_algo, rss_conf->rss_key,
-   hw->rss_key_size);
-   if (ret != 0) {
-   hns3_err(hw, "obtain hash algo and key failed, ret = 
%d",
-ret);
-   goto out;
-   }
+   memcpy(rss_conf->rss_key, rss_key, hw->rss_key_size);
rss_conf->rss_key_len = hw->rss_key_size;
}
+   rss_conf->algorithm = hash_func_map[hash_algo];
 
-out:
-   rte_spinlock_unlock(&hw->lock);
-
-   return ret;
+   return 0;
 }
 
 /*
-- 
2.30.0



[PATCH v8 07/10] app/proc-info: adjust the display format of RSS info

2023-11-01 Thread Jie Hai
This patch splits the length and value of RSS key into two parts,
removes spaces between RSS keys, and adds line breaks between RSS
key and RSS hf.

Before the adjustment, RSS info is shown as:
  - RSS
  -- RSS len 40 key (hex): 6d 5a 56 da 25 5b e c2 41 67 \
 25 3d 43 a3 8f b0 d0 ca 2b cb ae 7b 30 b4 77 cb 2d \
 a3 80 30 f2 c 6a 42 b7 3b be ac 1 fa -- hf 0x0
and after:
  - RSS info
  -- key len : 40
  -- key (hex) : 6d5a56da255b0ec24167253d43a38fb0d0c \
a2bcbae7b30b477cb2da38030f20c6a42b73bbeac01fa
  -- hash function : 0x0

Fixes: 8a37f37fc243 ("app/procinfo: add --show-port")
Cc: sta...@dpdk.org

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
Acked-by: Reshma Pattan
---
 app/proc-info/main.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 3a441ba07586..4c577fa417fd 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -1176,12 +1176,13 @@ show_port(void)
rss_conf.rss_key_len = dev_info.hash_key_size;
ret = rte_eth_dev_rss_hash_conf_get(i, &rss_conf);
if (ret == 0) {
-   printf("  - RSS\n");
-   printf("\t  -- RSS len %u key (hex):",
+   printf("  - RSS info\n");
+   printf("\t  -- key len : %u\n",
rss_conf.rss_key_len);
+   printf("\t  -- key (hex) : ");
for (k = 0; k < rss_conf.rss_key_len; k++)
-   printf(" %x", rss_conf.rss_key[k]);
-   printf("\t  -- hf 0x%"PRIx64"\n",
+   printf("%02x", rss_conf.rss_key[k]);
+   printf("\n\t  -- hash function : 0x%"PRIx64"\n",
rss_conf.rss_hf);
}
 
-- 
2.30.0



[PATCH v8 06/10] app/proc-info: fix never show RSS info

2023-11-01 Thread Jie Hai
Command show-port should show RSS info (rss_key, len and rss_hf),
However, the information is shown only when rss_conf.rss_key is not
NULL. Since no memory is allocated for rss_conf.rss_key, rss_key
will always be NULL and the rss_info will never show. This patch
fixes it.

Fixes: 8a37f37fc243 ("app/procinfo: add --show-port")
Cc: sta...@dpdk.org

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
Acked-by: Reshma Pattan 
Acked-by: Chengwen Feng 
Acked-by: Huisong Li 
---
 app/proc-info/main.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index ce53bc30dfec..3a441ba07586 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -151,6 +151,8 @@ struct desc_param {
 static struct desc_param rx_desc_param;
 static struct desc_param tx_desc_param;
 
+#define RSS_HASH_KEY_SIZE 64
+
 /* display usage */
 static void
 proc_info_usage(const char *prgname)
@@ -1011,6 +1013,7 @@ show_port(void)
struct rte_eth_fc_conf fc_conf;
struct rte_ether_addr mac;
struct rte_eth_dev_owner owner;
+   uint8_t rss_key[RSS_HASH_KEY_SIZE];
 
/* Skip if port is not in mask */
if ((enabled_port_mask & (1ul << i)) == 0)
@@ -1169,17 +1172,17 @@ show_port(void)
printf("\n");
}
 
+   rss_conf.rss_key = rss_key;
+   rss_conf.rss_key_len = dev_info.hash_key_size;
ret = rte_eth_dev_rss_hash_conf_get(i, &rss_conf);
if (ret == 0) {
-   if (rss_conf.rss_key) {
-   printf("  - RSS\n");
-   printf("\t  -- RSS len %u key (hex):",
-   rss_conf.rss_key_len);
-   for (k = 0; k < rss_conf.rss_key_len; k++)
-   printf(" %x", rss_conf.rss_key[k]);
-   printf("\t  -- hf 0x%"PRIx64"\n",
-   rss_conf.rss_hf);
-   }
+   printf("  - RSS\n");
+   printf("\t  -- RSS len %u key (hex):",
+   rss_conf.rss_key_len);
+   for (k = 0; k < rss_conf.rss_key_len; k++)
+   printf(" %x", rss_conf.rss_key[k]);
+   printf("\t  -- hf 0x%"PRIx64"\n",
+   rss_conf.rss_hf);
}
 
 #ifdef RTE_LIB_SECURITY
-- 
2.30.0



[PATCH v8 08/10] ethdev: add API to get RSS algorithm names

2023-11-01 Thread Jie Hai
This patch adds new API rte_eth_dev_rss_algo_name() to get
name of a RSS algorithm and document it.

Signed-off-by: Jie Hai 
Acked-by: Huisong Li 
---
 doc/guides/rel_notes/release_23_11.rst |  4 
 lib/ethdev/rte_ethdev.c| 25 +
 lib/ethdev/rte_ethdev.h| 16 
 lib/ethdev/version.map |  1 +
 4 files changed, 46 insertions(+)

diff --git a/doc/guides/rel_notes/release_23_11.rst 
b/doc/guides/rel_notes/release_23_11.rst
index e207786044f9..5276d302c40b 100644
--- a/doc/guides/rel_notes/release_23_11.rst
+++ b/doc/guides/rel_notes/release_23_11.rst
@@ -122,6 +122,10 @@ New Features
   a group's miss actions, which are the actions to be performed on packets
   that didn't match any of the flow rules in the group.
 
+* **Added new API for RSS hash algorithm**
+  Added new function ``rte_eth_dev_rss_algo_name`` to get name of RSS hash
+  algorithm.
+
 * **Updated Amazon ena (Elastic Network Adapter) net driver.**
 
   * Upgraded ENA HAL to latest version.
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index f9bd99d07eb1..cd19ea89e1f9 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -160,6 +160,17 @@ enum {
STAT_QMAP_RX
 };
 
+static const struct {
+   enum rte_eth_hash_function algo;
+   const char *name;
+} rte_eth_dev_rss_algo_names[] = {
+   {RTE_ETH_HASH_FUNCTION_DEFAULT, "default"},
+   {RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, "simple_xor"},
+   {RTE_ETH_HASH_FUNCTION_TOEPLITZ, "toeplitz"},
+   {RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ, "symmetric_toeplitz"},
+   {RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ_SORT, 
"symmetric_toeplitz_sort"},
+};
+
 int
 rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
 {
@@ -4792,6 +4803,20 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
return ret;
 }
 
+const char *
+rte_eth_dev_rss_algo_name(enum rte_eth_hash_function rss_algo)
+{
+   const char *name = "Unknown function";
+   unsigned int i;
+
+   for (i = 0; i < RTE_DIM(rte_eth_dev_rss_algo_names); i++) {
+   if (rss_algo == rte_eth_dev_rss_algo_names[i].algo)
+   return rte_eth_dev_rss_algo_names[i].name;
+   }
+
+   return name;
+}
+
 int
 rte_eth_dev_udp_tunnel_port_add(uint16_t port_id,
struct rte_eth_udp_tunnel *udp_tunnel)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 76c45bd759e4..f7339fd5adce 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -4644,6 +4644,22 @@ int
 rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
  struct rte_eth_rss_conf *rss_conf);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ *  Get the name of RSS hash algorithm.
+ *
+ * @param rss_algo
+ *   Hash algorithm.
+ *
+ * @return
+ *   Hash algorithm name or 'UNKNOWN' if the rss_algo cannot be recognized.
+ */
+__rte_experimental
+const char *
+rte_eth_dev_rss_algo_name(enum rte_eth_hash_function rss_algo);
+
 /**
  * Add UDP tunneling port for a type of tunnel.
  *
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 919ba5b8e65b..9336522b713c 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -314,6 +314,7 @@ EXPERIMENTAL {
rte_flow_restore_info_dynflag;
 
# added in 23.11
+   rte_eth_dev_rss_algo_name;
rte_eth_recycle_rx_queue_info_get;
rte_flow_group_set_miss_actions;
rte_flow_calc_table_hash;
-- 
2.30.0



[PATCH v8 09/10] app/proc-info: support querying RSS hash algorithm

2023-11-01 Thread Jie Hai
Display RSS hash algorithm with command show-port as below.
  - RSS info
  -- hash algorithm : toeplitz

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
Acked-by: Reshma Pattan 
---
 app/proc-info/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 4c577fa417fd..b672aaefbe99 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -1184,6 +1184,8 @@ show_port(void)
printf("%02x", rss_conf.rss_key[k]);
printf("\n\t  -- hash function : 0x%"PRIx64"\n",
rss_conf.rss_hf);
+   printf("\t  -- hash algorithm : %s\n",
+   rte_eth_dev_rss_algo_name(rss_conf.algorithm));
}
 
 #ifdef RTE_LIB_SECURITY
-- 
2.30.0



[PATCH v8 01/10] ethdev: overwrite some comment related to RSS

2023-11-01 Thread Jie Hai
1. overwrite the comments of fields of 'rte_eth_rss_conf'.
2. Add comments for RTE_ETH_HASH_FUNCTION_DEFAULT.

Signed-off-by: Jie Hai 
---
 lib/ethdev/rte_ethdev.h | 34 +++---
 lib/ethdev/rte_flow.h   |  1 +
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index a53dd5a1efec..343a134fdd12 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -448,24 +448,28 @@ struct rte_vlan_filter_conf {
 /**
  * A structure used to configure the Receive Side Scaling (RSS) feature
  * of an Ethernet port.
- * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
- * to an array holding the RSS key to use for hashing specific header
- * fields of received packets. The length of this array should be indicated
- * by *rss_key_len* below. Otherwise, a default random hash key is used by
- * the device driver.
- *
- * The *rss_key_len* field of the *rss_conf* structure indicates the length
- * in bytes of the array pointed by *rss_key*. To be compatible, this length
- * will be checked in i40e only. Others assume 40 bytes to be used as before.
- *
- * The *rss_hf* field of the *rss_conf* structure indicates the different
- * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
- * Supplying an *rss_hf* equal to zero disables the RSS feature.
  */
 struct rte_eth_rss_conf {
-   uint8_t *rss_key;/**< If not NULL, 40-byte hash key. */
+   /**
+* In rte_eth_dev_rss_hash_conf_get(), the *rss_key_len* should be
+* greater than or equal to the *hash_key_size* which get from
+* rte_eth_dev_info_get() API. And the *rss_key* should contain at least
+* *hash_key_size* bytes. If not meet these requirements, the query
+* result is unreliable even if the operation returns success.
+*
+* In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), if
+* *rss_key* is not NULL, the *rss_key_len* indicates the length of the
+* *rss_key* in bytes of the array pointed by *rss_key*, and it should
+* be equal to *hash_key_size*. Otherwise, drivers are free to use a
+* random or a default key or to ignore this configuration.
+*/
+   uint8_t *rss_key;
uint8_t rss_key_len; /**< hash key length in bytes. */
-   uint64_t rss_hf; /**< Hash functions to apply - see below. */
+   /**
+* Indicates the type of packets or the specific part of packets to
+* which RSS hashing is to be applied.
+*/
+   uint64_t rss_hf;
 };
 
 /*
diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index c16fe8c21f2f..751c29a0f3f3 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3226,6 +3226,7 @@ struct rte_flow_query_count {
  * Hash function types.
  */
 enum rte_eth_hash_function {
+   /** DEFAULT means driver decides which hash algorithm to pick. */
RTE_ETH_HASH_FUNCTION_DEFAULT = 0,
RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */
RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */
-- 
2.30.0



[PATCH v8 10/10] app/testpmd: add RSS hash algorithms display

2023-11-01 Thread Jie Hai
Add the command "show port X rss-hash algorithm" to display
the RSS hash algorithms of port X. An example is shown:

testpmd> show port 0 rss-hash algorithm
RSS algorithm:
  toeplitz

Signed-off-by: Jie Hai 
Acked-by: Huisong Li 
---
 app/test-pmd/cmdline.c | 29 -
 app/test-pmd/config.c  | 29 ++---
 app/test-pmd/testpmd.h |  2 +-
 3 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 35f5e4bbc002..912bf3355c10 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -174,8 +174,8 @@ static void cmd_help_long_parsed(void *parsed_result,
" by masks on port X. size is used to indicate the"
" hardware supported reta size\n\n"
 
-   "show port (port_id) rss-hash [key]\n"
-   "Display the RSS hash functions and RSS hash key of 
port\n\n"
+   "show port (port_id) rss-hash [key | algorithm]\n"
+   "Display the RSS hash functions, RSS hash key and 
RSS hash algorithms of port\n\n"
 
"clear port (info|stats|xstats|fdir) (port_id|all)\n"
"Clear information for port_id, or all.\n\n"
@@ -3026,15 +3026,17 @@ struct cmd_showport_rss_hash {
cmdline_fixed_string_t rss_hash;
cmdline_fixed_string_t rss_type;
cmdline_fixed_string_t key; /* optional argument */
+   cmdline_fixed_string_t algorithm; /* optional argument */
 };
 
 static void cmd_showport_rss_hash_parsed(void *parsed_result,
__rte_unused struct cmdline *cl,
-   void *show_rss_key)
+   __rte_unused void *data)
 {
struct cmd_showport_rss_hash *res = parsed_result;
 
-   port_rss_hash_conf_show(res->port_id, show_rss_key != NULL);
+   port_rss_hash_conf_show(res->port_id,
+   !strcmp(res->key, "key"), !strcmp(res->algorithm, "algorithm"));
 }
 
 static cmdline_parse_token_string_t cmd_showport_rss_hash_show =
@@ -3049,6 +3051,8 @@ static cmdline_parse_token_string_t 
cmd_showport_rss_hash_rss_hash =
 "rss-hash");
 static cmdline_parse_token_string_t cmd_showport_rss_hash_rss_key =
TOKEN_STRING_INITIALIZER(struct cmd_showport_rss_hash, key, "key");
+static cmdline_parse_token_string_t cmd_showport_rss_hash_rss_algo =
+   TOKEN_STRING_INITIALIZER(struct cmd_showport_rss_hash, algorithm, 
"algorithm");
 
 static cmdline_parse_inst_t cmd_showport_rss_hash = {
.f = cmd_showport_rss_hash_parsed,
@@ -3065,7 +3069,7 @@ static cmdline_parse_inst_t cmd_showport_rss_hash = {
 
 static cmdline_parse_inst_t cmd_showport_rss_hash_key = {
.f = cmd_showport_rss_hash_parsed,
-   .data = (void *)1,
+   .data = NULL,
.help_str = "show port  rss-hash key",
.tokens = {
(void *)&cmd_showport_rss_hash_show,
@@ -3077,6 +3081,20 @@ static cmdline_parse_inst_t cmd_showport_rss_hash_key = {
},
 };
 
+static cmdline_parse_inst_t cmd_showport_rss_hash_algo = {
+   .f = cmd_showport_rss_hash_parsed,
+   .data = NULL,
+   .help_str = "show port  rss-hash algorithm",
+   .tokens = {
+   (void *)&cmd_showport_rss_hash_show,
+   (void *)&cmd_showport_rss_hash_port,
+   (void *)&cmd_showport_rss_hash_port_id,
+   (void *)&cmd_showport_rss_hash_rss_hash,
+   (void *)&cmd_showport_rss_hash_rss_algo,
+   NULL,
+   },
+};
+
 /* *** Configure DCB *** */
 struct cmd_config_dcb {
cmdline_fixed_string_t port;
@@ -12969,6 +12987,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
(cmdline_parse_inst_t *)&cmd_tunnel_udp_config,
(cmdline_parse_inst_t *)&cmd_showport_rss_hash,
(cmdline_parse_inst_t *)&cmd_showport_rss_hash_key,
+   (cmdline_parse_inst_t *)&cmd_showport_rss_hash_algo,
(cmdline_parse_inst_t *)&cmd_config_rss_hash_key,
(cmdline_parse_inst_t *)&cmd_cleanup_txq_mbufs,
(cmdline_parse_inst_t *)&cmd_dump,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index b9fdb7e8f162..23fb4f8aa781 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -1504,24 +1504,7 @@ rss_config_display(struct rte_flow_action_rss *rss_conf)
printf(" %d", rss_conf->queue[i]);
printf("\n");
 
-   printf(" function: ");
-   switch (rss_conf->func) {
-   case RTE_ETH_HASH_FUNCTION_DEFAULT:
-   printf("default\n");
-   break;
-   case RTE_ETH_HASH_FUNCTION_TOEPLITZ:
-   printf("toeplitz\n");
-   break;
-   case RTE_ETH_HASH_FUNCTION_SIMPLE_XOR:
-   printf("simple_xor\n");
-   break;
-   case RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ:
-   printf("symmetric_toeplitz\n");
- 

RE: [PATCH v2] net/iavf: fix coredump when exiting testpmd

2023-11-01 Thread Lu, Wenzhuo
Hi Kaiwen,

> -Original Message-
> From: Kaiwen Deng 
> Sent: Wednesday, November 1, 2023 9:35 AM
> To: dev@dpdk.org
> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX
> ; Deng, KaiwenX ; Wu,
> Jingjing ; Xing, Beilei ; Zeng,
> ZhichaoX 
> Subject: [PATCH v2] net/iavf: fix coredump when exiting testpmd
> 
> Avf releasing mbuf using the vector path release API causes a coredump
> when the basic Tx path is selected.
> This commit changes to use the basic path release API when selecting the
> basic Tx path.
Sorry, don't catch the point. 
I see you changed the code when selecting AVX2 non-offload path. Confused about 
what's " the vector path release API " and what's " the basic path release API 
".

> 
> Fixes: 22f1e7608ebc ("net/iavf: fix AVX2 Tx selection")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Kaiwen Deng 
> ---
>  drivers/net/iavf/iavf_rxtx.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 610912f635..a16e03d88c 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -4022,6 +4022,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
>   PMD_DRV_LOG(DEBUG,
>   "AVX2 does not support outer checksum 
> offload,
> using Basic Tx (port %d).",
>   dev->data->port_id);
> + return;
>   } else {
>   dev->tx_pkt_burst = 
> iavf_xmit_pkts_vec_avx2_offload;
>   dev->tx_pkt_prepare = iavf_prep_pkts;
> --
> 2.34.1



RE: [PATCH] ethdev: fix 32-bit build with GCC-13

2023-11-01 Thread Ori Kam
Hi

> -Original Message-
> From: Ruifeng Wang 
> Sent: Wednesday, November 1, 2023 9:16 AM
> 
> aarch32 build with gcc-13.0.1 generated following warning:
> 
> In function 'memcpy',
> inlined from 'rte_memcpy' at
> ../lib/eal/arm/include/rte_memcpy_32.h:296:9,
> inlined from 'rte_flow_conv_action_conf' at 
> ../lib/ethdev/rte_flow.c:726:20,
> inlined from 'rte_flow_conv_actions' at ../lib/ethdev/rte_flow.c:936:10:
> warning: '__builtin_memcpy' specified bound 4294967264 exceeds maximum
> object size 2147483647 [-Wstringop-overflow=]
> 
> The issue is due to possible wrapping in unsigned arithmetic.
> The 'size' can be 0. 'off' is 32. When 'tmp' is equal to (unsigned)-32,
> the copy length is more than half the address space. Hence the warning.
> 
> Casted variables to 64-bit to avoid wrapping.
> 
> Fixes: 063911ee1df4 ("ethdev: add flow API object converter")
> Cc: adrien.mazarg...@6wind.com
> Cc: sta...@dpdk.org
> 
> Reported-by: Luca Boccassi 
> Signed-off-by: Ruifeng Wang 
> ---
>  lib/ethdev/rte_flow.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c
> index 3a67f1aaba..2a5a057195 100644
> --- a/lib/ethdev/rte_flow.c
> +++ b/lib/ethdev/rte_flow.c
> @@ -722,7 +722,7 @@ rte_flow_conv_action_conf(void *buf, const size_t
> size,
>   if (src.rss->key_len && src.rss->key) {
>   off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->key));
>   tmp = sizeof(*src.rss->key) * src.rss->key_len;
> - if (size >= off + tmp)
> + if (size >= (uint64_t)off + (uint64_t)tmp)
>   dst.rss->key = rte_memcpy
>   ((void *)((uintptr_t)dst.rss + off),
>src.rss->key, tmp);
> @@ -731,7 +731,7 @@ rte_flow_conv_action_conf(void *buf, const size_t
> size,
>   if (src.rss->queue_num) {
>   off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->queue));
>   tmp = sizeof(*src.rss->queue) * src.rss->queue_num;
> - if (size >= off + tmp)
> + if (size >= (uint64_t)off + (uint64_t)tmp)
>   dst.rss->queue = rte_memcpy
>   ((void *)((uintptr_t)dst.rss + off),
>src.rss->queue, tmp);
> --
> 2.25.1

Acked-by: Ori Kam 
Best,
Ori


RE: [PATCH v2] common/mlx5: Optimize mlx5 mempool get extmem

2023-11-01 Thread Slava Ovsiienko
Hi,

Thank you for this optimizing patch.
My concern is this line:
> + heap = malloc(mp->size * sizeof(struct mlx5_range));
The pool size can be huge and it might cause the large memory allocation 
(on host CPU side).

What is the reason causing "hours" of registering? Reallocs per each pool 
element?
The mp struct has "struct rte_mempool_memhdr_list mem_list" member.
I think we should consider populating this list with data from
"struct rte_pktmbuf_extmem *ext_mem" on pool creation.

Because of it seems the rte_mempool_mem_iter() functionality is
completely broken for the pools with external memory, and that's why
mlx5 implemented the dedicated branch to handle their registration.

With best regards,
Slava

> -Original Message-
> From: Aaron Conole 
> Sent: Tuesday, October 10, 2023 5:38 PM
> To: dev@dpdk.org
> Cc: John Romein ; Raslan Darawsheh
> ; Elena Agostini ; Dmitry
> Kozlyuk ; Matan Azrad ; Slava
> Ovsiienko ; Ori Kam ;
> Suanming Mou 
> Subject: [PATCH v2] common/mlx5: Optimize mlx5 mempool get extmem
> 
> From: John Romein 
> 
> This patch reduces the time to allocate and register tens of gigabytes of GPU
> memory from hours to seconds, by sorting the heap only once instead of for
> each object in the mempool.
> 
> Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities")
> 
> Signed-off-by: John Romein 
> ---
>  drivers/common/mlx5/mlx5_common_mr.c | 69 
>  1 file changed, 20 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/common/mlx5/mlx5_common_mr.c
> b/drivers/common/mlx5/mlx5_common_mr.c
> index 40ff9153bd..77b66e444b 100644
> --- a/drivers/common/mlx5/mlx5_common_mr.c
> +++ b/drivers/common/mlx5/mlx5_common_mr.c
> @@ -1389,63 +1389,23 @@ mlx5_mempool_get_chunks(struct
> rte_mempool *mp, struct mlx5_range **out,
>   return 0;
>  }
> 
> -struct mlx5_mempool_get_extmem_data {
> - struct mlx5_range *heap;
> - unsigned int heap_size;
> - int ret;
> -};
> -
>  static void
>  mlx5_mempool_get_extmem_cb(struct rte_mempool *mp, void *opaque,
>  void *obj, unsigned int obj_idx)
>  {
> - struct mlx5_mempool_get_extmem_data *data = opaque;
> + struct mlx5_range *heap = opaque;
>   struct rte_mbuf *mbuf = obj;
>   uintptr_t addr = (uintptr_t)mbuf->buf_addr;
> - struct mlx5_range *seg, *heap;
>   struct rte_memseg_list *msl;
>   size_t page_size;
>   uintptr_t page_start;
> - unsigned int pos = 0, len = data->heap_size, delta;
> 
>   RTE_SET_USED(mp);
> - RTE_SET_USED(obj_idx);
> - if (data->ret < 0)
> - return;
> - /* Binary search for an already visited page. */
> - while (len > 1) {
> - delta = len / 2;
> - if (addr < data->heap[pos + delta].start) {
> - len = delta;
> - } else {
> - pos += delta;
> - len -= delta;
> - }
> - }
> - if (data->heap != NULL) {
> - seg = &data->heap[pos];
> - if (seg->start <= addr && addr < seg->end)
> - return;
> - }
> - /* Determine the page boundaries and remember them. */
> - heap = realloc(data->heap, sizeof(heap[0]) * (data->heap_size + 1));
> - if (heap == NULL) {
> - free(data->heap);
> - data->heap = NULL;
> - data->ret = -1;
> - return;
> - }
> - data->heap = heap;
> - data->heap_size++;
> - seg = &heap[data->heap_size - 1];
>   msl = rte_mem_virt2memseg_list((void *)addr);
>   page_size = msl != NULL ? msl->page_sz : rte_mem_page_size();
>   page_start = RTE_PTR_ALIGN_FLOOR(addr, page_size);
> - seg->start = page_start;
> - seg->end = page_start + page_size;
> - /* Maintain the heap order. */
> - qsort(data->heap, data->heap_size, sizeof(heap[0]),
> -   mlx5_range_compare_start);
> + heap[obj_idx].start = page_start;
> + heap[obj_idx].end = page_start + page_size;
>  }
> 
>  /**
> @@ -1457,15 +1417,26 @@ static int
>  mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range
> **out,
>   unsigned int *out_n)
>  {
> - struct mlx5_mempool_get_extmem_data data;
> + unsigned int out_size = 1;
> + struct mlx5_range *heap;
> 
>   DRV_LOG(DEBUG, "Recovering external pinned pages of mempool
> %s",
>   mp->name);
> - memset(&data, 0, sizeof(data));
> - rte_mempool_obj_iter(mp, mlx5_mempool_get_extmem_cb,
> &data);
> - *out = data.heap;
> - *out_n = data.heap_size;
> - return data.ret;
> + heap = malloc(mp->size * sizeof(struct mlx5_range));
> + if (heap == NULL)
> + return -1;
> + rte_mempool_obj_iter(mp, mlx5_mempool_get_extmem_cb, heap);
> + qsort(heap, mp->size, sizeof(heap[0]), mlx5_range_compare_start);
> + /* remove duplicates */
> + for (unsigned int i = 1; i < mp->size; i++)
> + if (heap[out_

RE: [PATCH v2] crypto/qat: add sm2 ecdsa

2023-11-01 Thread Power, Ciara
Hi Arek,

> -Original Message-
> From: Kusztal, ArkadiuszX 
> Sent: Tuesday, October 31, 2023 9:27 PM
> To: dev@dpdk.org
> Cc: gak...@marvell.com; Ji, Kai ; Power, Ciara
> ; Kusztal, ArkadiuszX
> 
> Subject: [PATCH v2] crypto/qat: add sm2 ecdsa
> 
> Added SM2 ECDSA feature to the Intel QuickAssist Technology symmetric
> crypto PMD.
> 
> Signed-off-by: Arkadiusz Kusztal 
> ---
> v2:
> - fixed build issues

Acked-by: Ciara Power 


RE: [PATCH v2] net/iavf: fix coredump when exiting testpmd

2023-11-01 Thread Deng, KaiwenX



> -Original Message-
> From: Lu, Wenzhuo 
> Sent: Wednesday, November 1, 2023 3:53 PM
> To: Deng, KaiwenX ; dev@dpdk.org
> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX
> ; Deng, KaiwenX ; Wu,
> Jingjing ; Xing, Beilei ; Zeng,
> ZhichaoX 
> Subject: RE: [PATCH v2] net/iavf: fix coredump when exiting testpmd
> 
> Hi Kaiwen,
> 
> > -Original Message-
> > From: Kaiwen Deng 
> > Sent: Wednesday, November 1, 2023 9:35 AM
> > To: dev@dpdk.org
> > Cc: sta...@dpdk.org; Yang, Qiming ; Zhou,
> > YidingX ; Deng, KaiwenX
> > ; Wu, Jingjing ; Xing,
> > Beilei ; Zeng, ZhichaoX
> > 
> > Subject: [PATCH v2] net/iavf: fix coredump when exiting testpmd
> >
> > Avf releasing mbuf using the vector path release API causes a coredump
> > when the basic Tx path is selected.
> > This commit changes to use the basic path release API when selecting
> > the basic Tx path.
> Sorry, don't catch the point.
> I see you changed the code when selecting AVX2 non-offload path. Confused
> about what's " the vector path release API " and what's " the basic path
> release API ".
> 
Hi Wenzhuo:

Thanks for your review.
According to the code below.
"the vector path release API" is iavf_tx_queue_release_mbufs_sse.
"the basic path release API" is release_txq_mbufs.

static const
struct iavf_txq_ops iavf_txq_release_mbufs_ops[] = {
[IAVF_REL_MBUFS_DEFAULT].release_mbufs = release_txq_mbufs,
#ifdef RTE_ARCH_X86
[IAVF_REL_MBUFS_SSE_VEC].release_mbufs = 
iavf_tx_queue_release_mbufs_sse,
#ifdef CC_AVX512_SUPPORT
[IAVF_REL_MBUFS_AVX512_VEC].release_mbufs = 
iavf_tx_queue_release_mbufs_avx512,
#endif
> >
> > Fixes: 22f1e7608ebc ("net/iavf: fix AVX2 Tx selection")
> > Cc: sta...@dpdk.org
> >
> > Signed-off-by: Kaiwen Deng 
> > ---
> >  drivers/net/iavf/iavf_rxtx.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/drivers/net/iavf/iavf_rxtx.c
> > b/drivers/net/iavf/iavf_rxtx.c index 610912f635..a16e03d88c 100644
> > --- a/drivers/net/iavf/iavf_rxtx.c
> > +++ b/drivers/net/iavf/iavf_rxtx.c
> > @@ -4022,6 +4022,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
> > PMD_DRV_LOG(DEBUG,
> > "AVX2 does not support outer
> checksum offload, using Basic Tx
> > (port %d).",
> > dev->data->port_id);
> > +   return;
> > } else {
> > dev->tx_pkt_burst =
> iavf_xmit_pkts_vec_avx2_offload;
> > dev->tx_pkt_prepare = iavf_prep_pkts;
> > --
> > 2.34.1



Re: [PATCH v8 00/10] support setting and querying RSS algorithms

2023-11-01 Thread fengchengwen
LGTM
Series-acked-by: Chengwen Feng 


On 2023/11/1 15:40, Jie Hai wrote:
> This patchset is to support setting and querying RSS algorithms.
> For this purpose, field "rss_algo_capa" is added to ``rte_eth_dev_info``
> and field "algorithm" is added to ``rte_eth_rss_conf``.
> The drivers should reports their "rss_algo_capa" if they support
> updating RSS algorithms. Otherwise, the "rss_algo_capa" is set to
> RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT). The app configures RSS algorithms
> by field "algorithm" and the related API should verify "algorithm" with
> "rss_algo_capa".
> 
> --
> v8:
> 1. rewrite some comments.
> 2. add check for rss_key_len in ethdev level.
> 3. add Acked-by: Huisong Li .
> 4. fix log on RSS hash algorithm.
> 5. add rte_eth_dev_rss_algo_name to lib/ethdev/version.map.
> 6. fix RSS algorithm display on testpmd.
> 
> v7:
> 1. fix compile error.
> 2. add signed-off-by to patch[4/9].
> v6:
> 1. rewrite some comments.
> 2. add "rss_algo_capa" for `rte_eth_dev_info``.
> 3. add new API to get name of RSS algorithms
> 
> v5:
> 1. rewrite some comments.
> 2. check RSS algorithm for drivers supporting RSS.
> 3. change field "func" of rss_conf to "algorithm".
> 4. fix commit log for [PATCH v4 4/7].
> 5. add Acked-by Reshma Pattan.
> 6. add symmetric_toeplitz_sort for showing.
> 7. change "hf" to "hash function" for showing.
> 
> v4:
> 1. recomment some definitions related to RSS.
> 2. allocate static memory for rss_key instead of dynamic.
> 3. use array of strings to get the name of rss algorithm.
> 4. add display of rss algorithm with testpmd.
> 
> v3:
> 1. fix commit log for PATCH [1/5].
> 2. make RSS ABI changes description to start the actual text at the margin.
> 3. move defnition of enum rte_eth_hash_function to rte_ethdev.h.
> 4. fix some comment codes.
> 
> v2:
> 1. return error if "func" is invalid.
> 2. modify the comments of the "func" field.
> 3. modify commit log of patch [3/5].
> 4. use malloc instead of rte_malloc.
> 5. adjust display format of RSS info.
> 6. remove the string display of rss_hf.
> 
> Huisong Li (1):
>   net/hns3: support setting and querying RSS hash function
> 
> Jie Hai (9):
>   ethdev: overwrite some comment related to RSS
>   lib/ethdev: check RSS key length
>   ethdev: support setting and querying RSS algorithm
>   net/hns3: report RSS hash algorithms capability
>   app/proc-info: fix never show RSS info
>   app/proc-info: adjust the display format of RSS info
>   ethdev: add API to get RSS algorithm names
>   app/proc-info: support querying RSS hash algorithm
>   app/testpmd: add RSS hash algorithms display
> 
>  app/proc-info/main.c   | 24 +---
>  app/test-pmd/cmdline.c | 29 +++--
>  app/test-pmd/config.c  | 29 -
>  app/test-pmd/testpmd.h |  2 +-
>  doc/guides/rel_notes/release_23_11.rst |  9 +++
>  drivers/net/hns3/hns3_common.c |  4 ++
>  drivers/net/hns3/hns3_rss.c| 47 ---
>  lib/ethdev/rte_ethdev.c| 82 ++
>  lib/ethdev/rte_ethdev.h| 79 -
>  lib/ethdev/rte_flow.c  |  1 -
>  lib/ethdev/rte_flow.h  | 25 +---
>  lib/ethdev/version.map |  1 +
>  12 files changed, 239 insertions(+), 93 deletions(-)
> 


Re: [PATCH v8 02/10] lib/ethdev: check RSS key length

2023-11-01 Thread lihuisong (C)



在 2023/11/1 15:40, Jie Hai 写道:

In rte_eth_dev_rss_hash_conf_get(), the "rss_key_len" should be
greater than or equal to the "hash_key_size" which get from
rte_eth_dev_info_get() API. And the "rss_key" should contain at
least "hash_key_size" bytes. If these requirements are not met,
the query unreliable.

In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), the
"rss_key_len" indicates the length of the "rss_key" in bytes of
the array pointed by "rss_key", it should be equal to the
"hash_key_size" if "rss_key" is not NULL.

This patch checks "rss_key_len" in ethdev level.

Signed-off-by: Jie Hai 
---
  lib/ethdev/rte_ethdev.c | 31 +++
  1 file changed, 31 insertions(+)

diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index af23ac0ad00f..07bb35833ba6 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -1500,6 +1500,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
nb_rx_q, uint16_t nb_tx_q,
goto rollback;
}
  
+	if (dev_conf->rx_adv_conf.rss_conf.rss_key != NULL &&

+   dev_conf->rx_adv_conf.rss_conf.rss_key_len < 
dev_info.hash_key_size) {

dev_conf->rx_adv_conf.rss_conf.rss_key_len != dev_info.hash_key_size, right?
otherwise, this isn't inconsistent with the comments in patch 1.

+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, valid value: 
%u\n",
+   port_id, dev_conf->rx_adv_conf.rss_conf.rss_key_len,
+   dev_info.hash_key_size);
+   ret = -EINVAL;
+   goto rollback;
+   }
+
/*
 * Setup new number of Rx/Tx queues and reconfigure device.
 */
@@ -4698,6 +4708,14 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
return -ENOTSUP;
}
  
+	if (rss_conf->rss_key != NULL &&

+   rss_conf->rss_key_len != dev_info.hash_key_size) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, valid value: 
%u\n",
+   port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
+   return -EINVAL;
+   }
+
if (*dev->dev_ops->rss_hash_update == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
@@ -4712,6 +4730,7 @@ int
  rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
  struct rte_eth_rss_conf *rss_conf)
  {
+   struct rte_eth_dev_info dev_info = { 0 };
struct rte_eth_dev *dev;
int ret;
  
@@ -4725,6 +4744,18 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,

return -EINVAL;
}
  
+	ret = rte_eth_dev_info_get(port_id, &dev_info);

+   if (ret != 0)
+   return ret;
+
+   if (rss_conf->rss_key != NULL &&
+   rss_conf->rss_key_len < dev_info.hash_key_size) {
+   RTE_ETHDEV_LOG(ERR,
+   "Ethdev port_id=%u invalid RSS key len: %u, should not be 
less than: %u\n",
+   port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
+   return -EINVAL;
+   }
+
if (*dev->dev_ops->rss_hash_conf_get == NULL)
return -ENOTSUP;
ret = eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev,


Re: [PATCH v8 09/10] app/proc-info: support querying RSS hash algorithm

2023-11-01 Thread lihuisong (C)

lgtm,
Acked-by: Huisong Li 

在 2023/11/1 15:40, Jie Hai 写道:

Display RSS hash algorithm with command show-port as below.
   - RSS info
  -- hash algorithm : toeplitz

Signed-off-by: Jie Hai 
Signed-off-by: Dongdong Liu 
Acked-by: Reshma Pattan 
---
  app/proc-info/main.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/app/proc-info/main.c b/app/proc-info/main.c
index 4c577fa417fd..b672aaefbe99 100644
--- a/app/proc-info/main.c
+++ b/app/proc-info/main.c
@@ -1184,6 +1184,8 @@ show_port(void)
printf("%02x", rss_conf.rss_key[k]);
printf("\n\t  -- hash function : 0x%"PRIx64"\n",
rss_conf.rss_hf);
+   printf("\t  -- hash algorithm : %s\n",
+   rte_eth_dev_rss_algo_name(rss_conf.algorithm));
}
  
  #ifdef RTE_LIB_SECURITY


Re: [PATCH v8 01/10] ethdev: overwrite some comment related to RSS

2023-11-01 Thread lihuisong (C)

lgtm,
Acked-by: Huisong Li 

在 2023/11/1 15:40, Jie Hai 写道:

1. overwrite the comments of fields of 'rte_eth_rss_conf'.
2. Add comments for RTE_ETH_HASH_FUNCTION_DEFAULT.

Signed-off-by: Jie Hai 
---
  lib/ethdev/rte_ethdev.h | 34 +++---
  lib/ethdev/rte_flow.h   |  1 +
  2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index a53dd5a1efec..343a134fdd12 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -448,24 +448,28 @@ struct rte_vlan_filter_conf {
  /**
   * A structure used to configure the Receive Side Scaling (RSS) feature
   * of an Ethernet port.
- * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
- * to an array holding the RSS key to use for hashing specific header
- * fields of received packets. The length of this array should be indicated
- * by *rss_key_len* below. Otherwise, a default random hash key is used by
- * the device driver.
- *
- * The *rss_key_len* field of the *rss_conf* structure indicates the length
- * in bytes of the array pointed by *rss_key*. To be compatible, this length
- * will be checked in i40e only. Others assume 40 bytes to be used as before.
- *
- * The *rss_hf* field of the *rss_conf* structure indicates the different
- * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
- * Supplying an *rss_hf* equal to zero disables the RSS feature.
   */
  struct rte_eth_rss_conf {
-   uint8_t *rss_key;/**< If not NULL, 40-byte hash key. */
+   /**
+* In rte_eth_dev_rss_hash_conf_get(), the *rss_key_len* should be
+* greater than or equal to the *hash_key_size* which get from
+* rte_eth_dev_info_get() API. And the *rss_key* should contain at least
+* *hash_key_size* bytes. If not meet these requirements, the query
+* result is unreliable even if the operation returns success.
+*
+* In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), if
+* *rss_key* is not NULL, the *rss_key_len* indicates the length of the
+* *rss_key* in bytes of the array pointed by *rss_key*, and it should
+* be equal to *hash_key_size*. Otherwise, drivers are free to use a
+* random or a default key or to ignore this configuration.
+*/
+   uint8_t *rss_key;
uint8_t rss_key_len; /**< hash key length in bytes. */
-   uint64_t rss_hf; /**< Hash functions to apply - see below. */
+   /**
+* Indicates the type of packets or the specific part of packets to
+* which RSS hashing is to be applied.
+*/
+   uint64_t rss_hf;
  };
  
  /*

diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
index c16fe8c21f2f..751c29a0f3f3 100644
--- a/lib/ethdev/rte_flow.h
+++ b/lib/ethdev/rte_flow.h
@@ -3226,6 +3226,7 @@ struct rte_flow_query_count {
   * Hash function types.
   */
  enum rte_eth_hash_function {
+   /** DEFAULT means driver decides which hash algorithm to pick. */
RTE_ETH_HASH_FUNCTION_DEFAULT = 0,
RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */
RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */


[PATCH] test/dma: fix for buffer auto free

2023-11-01 Thread Amit Prakash Shukla
Buffer auto free test failed for more than 1 dma device as the device
initialization for the test was been done only for the first dma device.
This changeset fixes the same.

Fixes: 877cb3e37426 ("dmadev: add buffer auto free offload")

Signed-off-by: Amit Prakash Shukla 
---
 app/test/test_dmadev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
index 216f84b6bb..3d4cb37ee6 100644
--- a/app/test/test_dmadev.c
+++ b/app/test/test_dmadev.c
@@ -49,6 +49,8 @@ struct dma_add_test dma_add_test[] = {
[TEST_M2D_AUTO_FREE] = {.name = "m2d_auto_free", .enabled = false},
 };
 
+static bool dev_init;
+
 static void
 __rte_format_printf(3, 4)
 print_err(const char *func, int lineno, const char *format, ...)
@@ -837,7 +839,6 @@ test_m2d_auto_free(int16_t dev_id, uint16_t vchan)
};
uint32_t buf_cnt1, buf_cnt2;
struct rte_mempool_ops *ops;
-   static bool dev_init;
uint16_t nb_done = 0;
bool dma_err = false;
int retry = 100;
@@ -1011,6 +1012,7 @@ test_dmadev_instance(int16_t dev_id)
 
if ((info.dev_capa & RTE_DMA_CAPA_M2D_AUTO_FREE) &&
dma_add_test[TEST_M2D_AUTO_FREE].enabled == true) {
+   dev_init = false;
if (runtest("m2d_auto_free", test_m2d_auto_free, 128, dev_id, 
vchan,
CHECK_ERRS) < 0)
goto err;
-- 
2.25.1



[PATCH v3] net/ice: fix crash on closing representor ports

2023-11-01 Thread Mingjin Ye
The data resource in struct rte_eth_dev is cleared and points to NULL
when the DCF port is closed.

If the DCF representor port is closed after the DCF port is closed,
a segmentation fault occurs because the representor port accesses
the data resource released by the DCF port.

This patch checks if the resource is present before accessing.

Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
Cc: sta...@dpdk.org

Signed-off-by: Mingjin Ye 
---
v3: New solution.
---
 drivers/net/ice/ice_dcf_vf_representor.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ice/ice_dcf_vf_representor.c 
b/drivers/net/ice/ice_dcf_vf_representor.c
index b9fcfc80ad..8c45e28f02 100644
--- a/drivers/net/ice/ice_dcf_vf_representor.c
+++ b/drivers/net/ice/ice_dcf_vf_representor.c
@@ -111,14 +111,16 @@ ice_dcf_vf_repr_link_update(__rte_unused struct 
rte_eth_dev *ethdev,
 static __rte_always_inline struct ice_dcf_hw *
 ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)
 {
-   struct ice_dcf_adapter *dcf_adapter =
-   repr->dcf_eth_dev->data->dev_private;
+   struct rte_eth_dev_data *dcf_data = repr->dcf_eth_dev->data;
+   struct ice_dcf_adapter *dcf_adapter;
 
-   if (!dcf_adapter) {
+   if (!dcf_data || !dcf_data->dev_private) {
PMD_DRV_LOG(ERR, "DCF for VF representor has been released\n");
return NULL;
}
 
+   dcf_adapter = dcf_data->dev_private;
+
return &dcf_adapter->real_hw;
 }
 
-- 
2.25.1



RE: [PATCH v3] net/ice: fix crash on closing representor ports

2023-11-01 Thread Zhang, Qi Z



> -Original Message-
> From: Ye, MingjinX 
> Sent: Wednesday, November 1, 2023 6:14 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming ; Zhou, YidingX
> ; Ye, MingjinX ;
> sta...@dpdk.org; Zhang, Qi Z 
> Subject: [PATCH v3] net/ice: fix crash on closing representor ports
> 
> The data resource in struct rte_eth_dev is cleared and points to NULL when
> the DCF port is closed.
> 
> If the DCF representor port is closed after the DCF port is closed, a
> segmentation fault occurs because the representor port accesses the data
> resource released by the DCF port.
> 
> This patch checks if the resource is present before accessing.
> 
> Fixes: 5674465a32c8 ("net/ice: add DCF VLAN handling")
> Fixes: da9cdcd1f372 ("net/ice: fix crash on representor port closing")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Mingjin Ye 
> ---
> v3: New solution.
> ---
>  drivers/net/ice/ice_dcf_vf_representor.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_dcf_vf_representor.c
> b/drivers/net/ice/ice_dcf_vf_representor.c
> index b9fcfc80ad..8c45e28f02 100644
> --- a/drivers/net/ice/ice_dcf_vf_representor.c
> +++ b/drivers/net/ice/ice_dcf_vf_representor.c
> @@ -111,14 +111,16 @@ ice_dcf_vf_repr_link_update(__rte_unused struct
> rte_eth_dev *ethdev,  static __rte_always_inline struct ice_dcf_hw *
> ice_dcf_vf_repr_hw(struct ice_dcf_vf_repr *repr)  {
> - struct ice_dcf_adapter *dcf_adapter =
> - repr->dcf_eth_dev->data->dev_private;
> + struct rte_eth_dev_data *dcf_data = repr->dcf_eth_dev->data;

Seems this expose another issue, if dcf port already be closed, the dcf_eth_dev 
instance could already be reused by another driver.
So we can't assume dcf_eth_dev->data is NULL,  I think you can refine based on 
v2's method, but don't update dcf_valid flag in representor port's dev_stop.



> + struct ice_dcf_adapter *dcf_adapter;
> 
> - if (!dcf_adapter) {
> + if (!dcf_data || !dcf_data->dev_private) {
>   PMD_DRV_LOG(ERR, "DCF for VF representor has been
> released\n");
>   return NULL;
>   }
> 
> + dcf_adapter = dcf_data->dev_private;
> +
>   return &dcf_adapter->real_hw;
>  }
> 
> --
> 2.25.1



RE: [PATCH v2] net/iavf: fix coredump when exiting testpmd

2023-11-01 Thread Zhang, Qi Z



> -Original Message-
> From: Kaiwen Deng 
> Sent: Wednesday, November 1, 2023 9:35 AM
> To: dev@dpdk.org
> Cc: sta...@dpdk.org; Yang, Qiming ; Zhou, YidingX
> ; Deng, KaiwenX ; Wu,
> Jingjing ; Xing, Beilei ; Zeng,
> ZhichaoX 
> Subject: [PATCH v2] net/iavf: fix coredump when exiting testpmd

please remove testpmd from the title if this issue is not specific to testpmd.

You can always add reproduce step with testpmd in commit log.

> 
> Avf releasing mbuf using the vector path release API causes a coredump
> when the basic Tx path is selected.
> This commit changes to use the basic path release API when selecting the
> basic Tx path.
> 
> Fixes: 22f1e7608ebc ("net/iavf: fix AVX2 Tx selection")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Kaiwen Deng 
> ---
>  drivers/net/iavf/iavf_rxtx.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index
> 610912f635..a16e03d88c 100644
> --- a/drivers/net/iavf/iavf_rxtx.c
> +++ b/drivers/net/iavf/iavf_rxtx.c
> @@ -4022,6 +4022,7 @@ iavf_set_tx_function(struct rte_eth_dev *dev)
>   PMD_DRV_LOG(DEBUG,
>   "AVX2 does not support outer
> checksum offload, using Basic Tx (port %d).",
>   dev->data->port_id);
> + return;
>   } else {
>   dev->tx_pkt_burst =
> iavf_xmit_pkts_vec_avx2_offload;
>   dev->tx_pkt_prepare = iavf_prep_pkts;
> --
> 2.34.1



Re: [PATCH v2] doc: update matching list for i40e and ice driver

2023-11-01 Thread Kevin Traynor

On 11/11/2022 05:27, Qiming Yang wrote:

Add recommended matching list for ice PMD in DPDK 22.07 and
i40e PMD in DPDK 22.07 and 22.11.

Signed-off-by: Qiming Yang 
---
  doc/guides/nics/i40e.rst | 8 
  doc/guides/nics/ice.rst  | 2 ++
  2 files changed, 10 insertions(+)

diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index a0992dbc6c..a6c7dbd080 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -101,6 +101,10 @@ For X710/XL710/XXV710,
 +--+---+--+
 | DPDK version | Kernel driver version | Firmware version |
 +==+===+==+
+   |22.11 | 2.20.12   |   9.01   |
+   +--+---+--+
+   |22.07 | 2.19.3|   8.70   |
+   +--+---+--+
 |22.03 | 2.17.15   |   8.30   |
 +--+---+--+
 |21.11 | 2.17.4|   8.30   |
@@ -156,6 +160,10 @@ For X722,
 +--+---+--+
 | DPDK version | Kernel driver version | Firmware version |
 +==+===+==+
+   |22.11 | 2.20.12   |   6.00   |
+   +--+---+--+
+   |22.07 | 2.19.3|   5.60   |
+   +--+---+--+
 |22.03 | 2.17.15   |   5.50   |
 +--+---+--+
 |21.11 | 2.17.4|   5.30   |



The text above the table says:

"It is highly recommended to upgrade the i40e kernel driver and firmware 
to avoid the compatibility issues with i40e PMD. Here is the suggested 
matching list which has been tested and verified. The detailed 
information can refer to chapter Tested Platforms/Tested NICs in release 
notes."


The table is only showing the out-of-tree Kernel driver versions. The 
tested section shows that in-tree Kernel drivers are tested with as well.


The issue is that this section says "Here is the suggested matching 
list" and only lists out-of-tree drivers.


It is probably just a left over from when in-tree drivers had version 
numbers but it is causing some confusion for users as it implies that 
the Intel recommendation is to use out-of-tree drivers and not in-tree.


So I suggest that another column is added here to also show the in-tree 
kernel driver tested with.


What do you think ?


diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
index c7f82c261d..ce075e067c 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -64,6 +64,8 @@ The detailed information can refer to chapter Tested 
Platforms/Tested NICs in re
 
+---+---+-+---+--+---+
 |22.03  | 1.8.3 |  1.3.28 |  1.3.35   |1.3.8 | 
   3.2|
 
+---+---+-+---+--+---+
+   |22.07  | 1.9.11|  1.3.30 |  1.3.37   |1.3.10|  
  4.0|
+   
+---+---+-+---+--+---+
  
  Pre-Installation Configuration

  --




Re: [PATCH] net/nfp: reduce space reserved for layer 2 overhead

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 7:26 AM, Chaoyong He wrote:
> From: James Hershaw 
> 
> Reduce the space reserved for layer 2 overhead by defining
> NFP_ETH_OVERHEAD.
> 
> Previously, the overhead was not explicitly defined, only the
> NFP_FRAME_SIZE_MAX value and the maximum layer 3 MTU was read from
> hardware, which is set by firmware.
> 
> This resulted in a massive overhead, 516 Bytes in most cases, and while
> this can hold useful metadata in some cases, for the most part is not
> necessary. As such the overhead is explicitly defined in line with other
> net PMDs and the maximum frame size is calculated based on this and the
> layer 3 MTU read from firmware.
> 
> Signed-off-by: James Hershaw 
> Reviewed-by: Chaoyong He >

Applied to dpdk-next-net/main, thanks.


[PATCH v4 0/4] add pointer compression API

2023-11-01 Thread Paul Szczepanek
This patchset is proposing adding a new EAL header with utility functions
that allow compression of arrays of pointers.

When passing caches full of pointers between threads, memory containing
the pointers is copied multiple times which is especially costly between
cores. A compression method will allow us to shrink the memory size
copied.

The compression takes advantage of the fact that pointers are usually
located in a limited memory region (like a mempool). We can compress them
by converting them to offsets from a base memory address.

Offsets can be stored in fewer bytes (dictated by the memory region size
and alignment of the pointer). For example: an 8 byte aligned pointer
which is part of a 32GB memory pool can be stored in 4 bytes. The API is
very generic and does not assume mempool pointers, any pointer can be
passed in.

Compression is based on few and fast operations and especially with vector
instructions leveraged creates minimal overhead.

The API accepts and returns arrays because the overhead means it only is
worth it when done in bulk.

Test is added that shows potential performance gain from compression. In
this test an array of pointers is passed through a ring between two cores.
It shows the gain which is dependent on the bulk operation size. In this
synthetic test run on ampere altra a substantial (up to 25%) performance
gain is seen if done in bulk size larger than 32. At 32 it breaks even and
lower sizes create a small (less than 5%) slowdown due to overhead.

In a more realistic mock application running the l3 forwarding dpdk
example that works in pipeline mode on two cores this translated into a
~5% throughput increase on an ampere altra.

v2:
* addressed review comments (style, explanations and typos)
* lowered bulk iterations closer to original numbers to keep runtime short
* fixed pointer size warning on 32-bit arch
v3:
* added 16-bit versions of compression functions and tests
* added documentation of these new utility functions in the EAL guide
v4:
* added unit test
* fix bug in NEON implementation of 32-bit decompress

Paul Szczepanek (4):
  eal: add pointer compression functions
  test: add pointer compress tests to ring perf test
  docs: add pointer compression to the EAL guide
  test: add unit test for ptr compression

 .mailmap  |   1 +
 app/test/meson.build  |   1 +
 app/test/test_eal_ptr_compress.c  | 108 ++
 app/test/test_ring.h  |  94 -
 app/test/test_ring_perf.c | 354 --
 .../prog_guide/env_abstraction_layer.rst  | 142 +++
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_ptr_compress.h| 266 +
 8 files changed, 843 insertions(+), 124 deletions(-)
 create mode 100644 app/test/test_eal_ptr_compress.c
 create mode 100644 lib/eal/include/rte_ptr_compress.h

--
2.25.1



[PATCH v4 2/4] test: add pointer compress tests to ring perf test

2023-11-01 Thread Paul Szczepanek
Add a test that runs a zero copy burst enqueue and dequeue on a ring
of raw pointers and compressed pointers at different burst sizes to
showcase performance benefits of newly added pointer compression APIs.

Refactored threading code to pass more parameters to threads to
reuse existing code. Added more bulk sizes to showcase their effects
on compression. Adjusted loop iteration numbers to take into account
bulk sizes to keep runtime constant (instead of number of operations).

Adjusted old printfs to match new ones which have aligned numbers.

Signed-off-by: Paul Szczepanek 
Reviewed-by: Honnappa Nagarahalli 
---
 app/test/test_ring.h  |  94 +-
 app/test/test_ring_perf.c | 354 +-
 2 files changed, 324 insertions(+), 124 deletions(-)

diff --git a/app/test/test_ring.h b/app/test/test_ring.h
index 45c263f3ff..3b00f2465d 100644
--- a/app/test/test_ring.h
+++ b/app/test/test_ring.h
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2019-2023 Arm Limited
  */

 #include 
 #include 
 #include 
+#include 
+#include 

 /* API type to call
  * rte_ring__enqueue_
@@ -25,6 +27,10 @@
 #define TEST_RING_ELEM_BULK 16
 #define TEST_RING_ELEM_BURST 32

+#define TEST_RING_ELEM_BURST_ZC 64
+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 128
+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32 256
+
 #define TEST_RING_IGNORE_API_TYPE ~0U

 /* This function is placed here as it is required for both
@@ -101,6 +107,9 @@ static inline unsigned int
 test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,
unsigned int api_type)
 {
+   unsigned int ret;
+   struct rte_ring_zc_data zcd = {0};
+
/* Legacy queue APIs? */
if (esize == -1)
switch (api_type) {
@@ -152,6 +161,46 @@ test_ring_enqueue(struct rte_ring *r, void **obj, int 
esize, unsigned int n,
case (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):
return rte_ring_mp_enqueue_burst_elem(r, obj, esize, n,
NULL);
+   case (TEST_RING_ELEM_BURST_ZC):
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, esize, n, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_memcpy(zcd.ptr1, (char *)obj, zcd.n1 * esize);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_memcpy(zcd.ptr2,
+   (char *)obj + zcd.n1 * esize,
+   (ret - zcd.n1) * esize);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret;
+   case (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):
+   /* rings cannot store uint16_t so we use a uint32_t
+* and half the requested number of elements
+* and compensate by doubling the returned numbers
+*/
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, sizeof(uint32_t), n / 2, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_ptr_compress_16(0, obj, zcd.ptr1, zcd.n1 * 2, 3);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_ptr_compress_16(0,
+   obj + (zcd.n1 * 2),
+   zcd.ptr2,
+   (ret - zcd.n1) * 2, 3);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret * 2;
+   case (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, sizeof(uint32_t), n, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_ptr_compress_32(0, obj, zcd.ptr1, zcd.n1, 3);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_ptr_compress_32(0, obj + zcd.n1,
+   zcd.ptr2, ret - zcd.n1, 3);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret;
default:
printf("Invalid API type\n");
return 0;
@@ -162,6 +211,9 @@ static inline unsigned int
 test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,
unsigned int api_type)
 {
+   unsigned int ret;
+   struct rte_ring_zc_data zcd = {0};
+
/* Legacy queue APIs? */
if (esize == -1)

[PATCH v4 4/4] test: add unit test for ptr compression

2023-11-01 Thread Paul Szczepanek
Test compresses and decompresses pointers with various combinations
of memory regions and alignments and verifies the pointers are
recovered correctly.

Signed-off-by: Paul Szczepanek 
---
 app/test/meson.build |   1 +
 app/test/test_eal_ptr_compress.c | 108 +++
 2 files changed, 109 insertions(+)
 create mode 100644 app/test/test_eal_ptr_compress.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 05bae9216d..753de4bbd3 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -61,6 +61,7 @@ source_file_deps = {
 'test_dmadev_api.c': ['dmadev'],
 'test_eal_flags.c': [],
 'test_eal_fs.c': [],
+'test_eal_ptr_compress.c': [],
 'test_efd.c': ['efd', 'net'],
 'test_efd_perf.c': ['efd', 'hash'],
 'test_errno.c': [],
diff --git a/app/test/test_eal_ptr_compress.c b/app/test/test_eal_ptr_compress.c
new file mode 100644
index 00..c1c9a98be7
--- /dev/null
+++ b/app/test/test_eal_ptr_compress.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include "test.h"
+#include 
+#include 
+
+#include 
+
+#define MAX_ALIGN_EXPONENT 3
+#define PTRS_SIZE 16
+#define NUM_BASES 2
+#define NUM_REGIONS 4
+#define MAX_32BIT_REGION ((uint64_t)UINT32_MAX + 1)
+#define MAX_16BIT_REGION (UINT16_MAX + 1)
+
+static int
+test_eal_ptr_compress_params(
+   void *base,
+   uint64_t mem_sz,
+   unsigned int align_exp,
+   unsigned int num_ptrs,
+   bool use_32_bit)
+{
+   unsigned int i;
+   unsigned int align = 1 << align_exp;
+   void *ptrs[PTRS_SIZE] = {0};
+   void *ptrs_out[PTRS_SIZE] = {0};
+   uint32_t offsets32[PTRS_SIZE] = {0};
+   uint16_t offsets16[PTRS_SIZE] = {0};
+
+   for (i = 0; i < num_ptrs; i++) {
+   /* make pointers point at memory in steps of align */
+   /* alternate steps from the start and end of memory region */
+   if ((i & 1) == 1)
+   ptrs[i] = (char *)base + mem_sz - i * align;
+   else
+   ptrs[i] = (char *)base + i * align;
+   }
+
+   if (use_32_bit) {
+   rte_ptr_compress_32(base, ptrs, offsets32, num_ptrs, align_exp);
+   rte_ptr_decompress_32(base, offsets32, ptrs_out, num_ptrs,
+   align_exp);
+   } else {
+   rte_ptr_compress_16(base, ptrs, offsets16, num_ptrs, align_exp);
+   rte_ptr_decompress_16(base, offsets16, ptrs_out, num_ptrs,
+   align_exp);
+   }
+
+   TEST_ASSERT_BUFFERS_ARE_EQUAL(ptrs, ptrs_out, sizeof(void *) * num_ptrs,
+   "Decompressed pointers corrupted\nbase pointer: %p, "
+   "memory region size: %" PRIu64 ", alignment exponent: %u, "
+   "num of pointers: %u, using %s offsets",
+   base, mem_sz, align_exp, num_ptrs,
+   use_32_bit ? "32-bit" : "16-bit");
+
+   return 0;
+}
+
+static int
+test_eal_ptr_compress(void)
+{
+   unsigned int j, k, n;
+   int ret = 0;
+   void * const bases[NUM_BASES] = { (void *)0, (void *)UINT16_MAX };
+   /* maximum size for pointers aligned by consecutive powers of 2 */
+   const uint64_t region_sizes_16[NUM_REGIONS] = {
+   MAX_16BIT_REGION,
+   MAX_16BIT_REGION * 2,
+   MAX_16BIT_REGION * 4,
+   MAX_16BIT_REGION * 8,
+   };
+   const uint64_t region_sizes_32[NUM_REGIONS] = {
+   MAX_32BIT_REGION,
+   MAX_32BIT_REGION * 2,
+   MAX_32BIT_REGION * 4,
+   MAX_32BIT_REGION * 8,
+   };
+
+   for (j = 0; j < NUM_REGIONS; j++) {
+   for (k = 0; k < NUM_BASES; k++) {
+   for (n = 1; n < PTRS_SIZE; n++) {
+   ret |= test_eal_ptr_compress_params(
+   bases[k],
+   region_sizes_16[j],
+   j /* exponent of alignment */,
+   n,
+   false
+   );
+   ret |= test_eal_ptr_compress_params(
+   bases[k],
+   region_sizes_32[j],
+   j /* exponent of alignment */,
+   n,
+   true
+   );
+   if (ret != 0)
+   return ret;
+   }
+   }
+   }
+
+   return ret;
+}
+
+REGISTER_FAST_TEST(eal_ptr_compress_autotest, true, true, 
test_eal_ptr_compress);
--
2.25.1



[PATCH v4 1/4] eal: add pointer compression functions

2023-11-01 Thread Paul Szczepanek
Add a new utility header for compressing pointers. The provided
functions can store pointers in 32-bit offsets.

The compression takes advantage of the fact that pointers are
usually located in a limited memory region (like a mempool).
We can compress them by converting them to offsets from a base
memory address. Offsets can be stored in fewer bytes (dictated
by the memory region size and alignment of the pointer).
For example: an 8 byte aligned pointer which is part of a 32GB
memory pool can be stored in 4 bytes.

Suggested-by: Honnappa Nagarahalli 
Signed-off-by: Paul Szczepanek 
Signed-off-by: Kamalakshitha Aligeri 
Reviewed-by: Honnappa Nagarahalli 
---
 .mailmap   |   1 +
 lib/eal/include/meson.build|   1 +
 lib/eal/include/rte_ptr_compress.h | 266 +
 3 files changed, 268 insertions(+)
 create mode 100644 lib/eal/include/rte_ptr_compress.h

diff --git a/.mailmap b/.mailmap
index 864d33ee46..3f0c9d32f5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1058,6 +1058,7 @@ Paul Greenwalt 
 Paulis Gributs 
 Paul Luse 
 Paul M Stillwell Jr 
+Paul Szczepanek 
 Pavan Kumar Linga 
 Pavan Nikhilesh  
 Pavel Belous 
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index a0463efac7..17d8373648 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -36,6 +36,7 @@ headers += files(
 'rte_pci_dev_features.h',
 'rte_per_lcore.h',
 'rte_pflock.h',
+   'rte_ptr_compress.h',
 'rte_random.h',
 'rte_reciprocal.h',
 'rte_seqcount.h',
diff --git a/lib/eal/include/rte_ptr_compress.h 
b/lib/eal/include/rte_ptr_compress.h
new file mode 100644
index 00..6697385113
--- /dev/null
+++ b/lib/eal/include/rte_ptr_compress.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: BSD-shift-Clause
+ * Copyright(c) 2023 Arm Limited
+ */
+
+#ifndef RTE_PTR_COMPRESS_H
+#define RTE_PTR_COMPRESS_H
+
+/**
+ * @file
+ * Pointer compression and decompression functions.
+ *
+ * When passing arrays full of pointers between threads, memory containing
+ * the pointers is copied multiple times which is especially costly between
+ * cores. These functions allow us to compress the pointers.
+ *
+ * Compression takes advantage of the fact that pointers are usually located in
+ * a limited memory region (like a mempool). We compress them by converting 
them
+ * to offsets from a base memory address. Offsets can be stored in fewer bytes.
+ *
+ * The compression functions come in two varieties: 32-bit and 16-bit.
+ *
+ * To determine how many bits are needed to compress the pointer calculate
+ * the biggest offset possible (highest value pointer - base pointer)
+ * and shift the value right according to alignment (shift by exponent of the
+ * power of 2 of alignment: aligned by 4 - shift by 2, aligned by 8 - shift by
+ * 3, etc.). The resulting value must fit in either 32 or 16 bits.
+ *
+ * For usage example and further explanation please see "Pointer Compression" 
in
+ * doc/guides/prog_guide/env_abstraction_layer.rst
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compress pointers into 32-bit offsets from base pointer.
+ *
+ * @note It is programmer's responsibility to ensure the resulting offsets fit
+ * into 32 bits. Alignment of the structures pointed to by the pointers allows
+ * us to drop bits from the offsets. This is controlled by the bit_shift
+ * parameter. This means that if structures are aligned by 8 bytes they must be
+ * within 32GB of the base pointer. If there is no such alignment guarantee 
they
+ * must be within 4GB.
+ *
+ * @param ptr_base
+ *   A pointer used to calculate offsets of pointers in src_table.
+ * @param src_table
+ *   A pointer to an array of pointers.
+ * @param dest_table
+ *   A pointer to an array of compressed pointers returned by this function.
+ * @param n
+ *   The number of objects to compress, must be strictly positive.
+ * @param bit_shift
+ *   Byte alignment of memory pointed to by the pointers allows for
+ *   bits to be dropped from the offset and hence widen the memory region that
+ *   can be covered. This controls how many bits are right shifted.
+ **/
+static __rte_always_inline void
+rte_ptr_compress_32(void *ptr_base, void **src_table,
+   uint32_t *dest_table, unsigned int n, unsigned int bit_shift)
+{
+   unsigned int i = 0;
+#if defined RTE_HAS_SVE_ACLE
+   svuint64_t v_ptr_table;
+   svbool_t pg = svwhilelt_b64(i, n);
+   do {
+   v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
+   v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
+   v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
+   svst1w(pg, &dest_table[i], v_ptr_table);
+   i += svcntd();
+   pg = svwhilelt_b64(i, n);
+   } while (svptest_any(svptrue_b64(), pg));
+#elif defined __ARM_NEON

[PATCH v4 3/4] docs: add pointer compression to the EAL guide

2023-11-01 Thread Paul Szczepanek
Documentation added in the EAL guide for the new
utility functions for pointer compression
showing example code and potential usecases

Signed-off-by: Paul Szczepanek 
Reviewed-by: Honnappa Nagarahalli 
---
 .../prog_guide/env_abstraction_layer.rst  | 142 ++
 1 file changed, 142 insertions(+)

diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst 
b/doc/guides/prog_guide/env_abstraction_layer.rst
index 89014789de..cc56784e3d 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -1192,3 +1192,145 @@ will not be deallocated.

 Any successful deallocation event will trigger a callback, for which user
 applications and other DPDK subsystems can register.
+
+.. _pointer_compression:
+
+Pointer Compression
+---
+
+Use ``rte_ptr_compress_16()`` and ``rte_ptr_decompress_16()`` to compress and
+decompress pointers into 16-bit offsets. Use ``rte_ptr_compress_32()`` and
+``rte_ptr_decompress_32()`` to compress and decompress pointers into 32-bit
+offsets.
+
+Compression takes advantage of the fact that pointers are usually located in a
+limited memory region (like a mempool). By converting them to offsets from a
+base memory address they can be stored in fewer bytes. How many bytes are 
needed
+to store the offset is dictated by the memory region size and alignment of
+objects the pointers point to.
+
+For example, a pointer which is part of a 4GB memory pool can be stored as 32
+bit offset. If the pointer points to memory that is 8 bytes aligned then 3 bits
+can be dropped from the offset and a 32GB memory pool can now fit in 32 bits.
+
+For performance reasons these requirements are not enforced programmatically.
+The programmer is responsible for ensuring that the combination of distance
+from the base pointer and memory alignment allow for storing of the offset in
+the number of bits indicated by the function name (16 or 32). Start of mempool
+memory would be a good candidate for the base pointer. Otherwise any pointer
+that precedes all pointers, is close enough and has the same alignment as the
+pointers being compressed will work.
+
+.. note::
+
+Performance gains depend on the batch size of pointers and CPU capabilities
+such as vector extensions. It's important to measure the performance
+increase on target hardware. A test called ``ring_perf_autotest`` in
+``dpdk-test`` can provide the measurements.
+
+Example usage
+~
+
+In this example we send pointers between two cores through a ring. While this
+is a realistic use case the code is simplified for demonstration purposes and
+does not have error handling.
+
+.. code-block:: c
+
+#include 
+#include 
+#include 
+#include 
+
+#define ITEMS_ARRAY_SIZE (1024)
+#define BATCH_SIZE (128)
+#define ALIGN_EXPONENT (3)
+#define ITEM_ALIGN (1<

Re: [PATCH v3 0/3] add pointer compression API

2023-11-01 Thread Paul Szczepanek

On 01/11/2023 07:42, Morten Brørup wrote:

From: Paul Szczepanek [mailto:paul.szczepa...@arm.com]
Sent: Tuesday, 31 October 2023 19.11

[...]


In a more realistic mock application running the l3 forwarding dpdk
example that works in pipeline mode this translated into a ~5%
throughput
increase on an ampere altra.

What was the bulk size in this test?

And were the pipeline stages running on the same lcore or individual lcores per 
pipeline stage?



The pipeline mode was run on separate cores and used 128 as the bulk size.



Re: [PATCH v7 1/9] ethdev: overwrite some comment related to RSS

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 3:41 AM, lihuisong (C) wrote:
> 
> 在 2023/10/28 9:46, Jie Hai 写道:
>> 1. overwrite the comments of fields of 'rte_eth_rss_conf'.
>> 2. Add comments for RTE_ETH_HASH_FUNCTION_DEFAULT.
>>
>> Signed-off-by: Jie Hai 
>> ---
>>   lib/ethdev/rte_ethdev.h | 33 ++---
>>   lib/ethdev/rte_flow.h   |  1 +
>>   2 files changed, 19 insertions(+), 15 deletions(-)
>>
>> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
>> index 2fd3cd808dbf..37fd5afef48a 100644
>> --- a/lib/ethdev/rte_ethdev.h
>> +++ b/lib/ethdev/rte_ethdev.h
>> @@ -448,24 +448,27 @@ struct rte_vlan_filter_conf {
>>   /**
>>    * A structure used to configure the Receive Side Scaling (RSS) feature
>>    * of an Ethernet port.
>> - * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
>> - * to an array holding the RSS key to use for hashing specific header
>> - * fields of received packets. The length of this array should be
>> indicated
>> - * by *rss_key_len* below. Otherwise, a default random hash key is
>> used by
>> - * the device driver.
>> - *
>> - * The *rss_key_len* field of the *rss_conf* structure indicates the
>> length
>> - * in bytes of the array pointed by *rss_key*. To be compatible, this
>> length
>> - * will be checked in i40e only. Others assume 40 bytes to be used as
>> before.
>> - *
>> - * The *rss_hf* field of the *rss_conf* structure indicates the
>> different
>> - * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
>> - * Supplying an *rss_hf* equal to zero disables the RSS feature.
>>    */
>>   struct rte_eth_rss_conf {
>> -    uint8_t *rss_key;    /**< If not NULL, 40-byte hash key. */
>> +    /**
>> + * In rte_eth_dev_rss_hash_conf_get(), the *rss_key_len* should be
>> + * greater than or equal to the hash_key_size which get from
>>
> Is this added the new comment?
> Suggest that the "rss_key_len" field should be equal to the
> hash_key_size from dev_info_get().
> Because many PMDs, like, hns3, i40e, ice and mlx5, check it in driver
> based on the condition that the rss_key_len field must be equal to the
> hash_key_size when the rss_key is not NULL.
> IMO, it is better that this check should be added in ethdev layer.
>

+1 to add check in ethdev layer


>> + * rte_eth_dev_info_get() API. And the *rss_key* should contain
>> at least
>> + * *rss_key_len* bytes. If not meet these requirements, the query
>> result
> 
> here "rss_key_len" should be "hash_key_size", right?
> 
>> + * is unreliable even the operation returns success.
> s/even/even if
>> + *
>> + * In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), the
>> + * *rss_key_len* indicates the length of the *rss_key* in bytes of
>> + * the array pointed by *rss_key*. Drivers are free to ignore the
>> + * *rss_key_len* and assume key length is 40 bytes.
>>
> please modify the comment: "Drivers are free to ignore the *rss_key_len*
> and assume key length is 40 bytes. "
> Actually, most of PMDs, like, hns3, i40e, mlx, do not configure RSS hash
> key if rss_key is NULL and treat it as not updating the hash key.
>

This is not for the case where rss_key is NULL, most driver use
hardcoded 40 bytes as rss key len, so they don't check the rss_key_len,
only some drivers use not 40 bytes length, that is why above comment is
correct I think.


>> + */
>> +    uint8_t *rss_key;
>>   uint8_t rss_key_len; /**< hash key length in bytes. */
>> -    uint64_t rss_hf; /**< Hash functions to apply - see below. */
>> +    /**
>> + * Indicates the type of packets or the specific part of packets to
>> + * which RSS hashing is to be applied.
>> + */
>> +    uint64_t rss_hf;
>>   };
>>     /*
>> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
>> index edefa34c10da..25f1dffd1f30 100644
>> --- a/lib/ethdev/rte_flow.h
>> +++ b/lib/ethdev/rte_flow.h
>> @@ -3226,6 +3226,7 @@ struct rte_flow_query_count {
>>    * Hash function types.
>>    */
>>   enum rte_eth_hash_function {
>> +    /** DEFAULT means driver decides which hash algorithm to pick. */
>>   RTE_ETH_HASH_FUNCTION_DEFAULT = 0,
>>   RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */
>>   RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */



Re: [PATCH v7 2/9] ethdev: support setting and querying RSS algorithm

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 4:01 AM, lihuisong (C) wrote:
> With belows to changes,
> Acked-by: Huisong Li 
> 
> 
> 在 2023/10/28 9:46, Jie Hai 写道:
>> Currently, rte_eth_rss_conf supports configuring and querying
>> RSS hash functions, rss key and it's length, but not RSS hash
>> algorithm.
>>
>> The structure ``rte_eth_dev_info`` is extended by adding a new
>> field "rss_algo_capa". Drivers are responsible for reporting this
>> capa and configurations of RSS hash algorithm can be verified based
>> on the capability. The default value of "rss_algo_capa" is
>> RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) if drivers do not report it.
>>
>> The structure ``rte_eth_rss_conf`` is extended by adding a new
>> field "algorithm". This represents the RSS algorithms to apply.
>> If the value of "algorithm" used for configuration is a gibberish
>> value, drivers should report the error.
>>
>> To check whether the drivers report valid "algorithm", it is set
>> to default value before querying in rte_eth_dev_rss_hash_conf_get().
>>
>> Signed-off-by: Jie Hai 
>> Signed-off-by: Dongdong Liu 
>> ---
>>   doc/guides/rel_notes/release_23_11.rst |  5 
>>   lib/ethdev/rte_ethdev.c    | 26 
>>   lib/ethdev/rte_ethdev.h    | 33 +-
>>   lib/ethdev/rte_flow.c  |  1 -
>>   lib/ethdev/rte_flow.h  | 26 ++--
>>   5 files changed, 65 insertions(+), 26 deletions(-)
>>
>> diff --git a/doc/guides/rel_notes/release_23_11.rst
>> b/doc/guides/rel_notes/release_23_11.rst
>> index 0a6fc76a9d02..a35d729d2cc7 100644
>> --- a/doc/guides/rel_notes/release_23_11.rst
>> +++ b/doc/guides/rel_notes/release_23_11.rst
>> @@ -360,6 +360,11 @@ ABI Changes
>>   * security: struct ``rte_security_ipsec_sa_options`` was updated
>>     due to inline out-of-place feature addition.
>>   +* ethdev: Added "rss_algo_capa" field to ``rte_eth_dev_info``
>> structure for
>> +* reporting RSS hash algorithm capability.
>> +
>> +* ethdev: Added "algorithm" field to ``rte_eth_rss_conf`` structure
>> for RSS
>> +  hash algorithm.
>>     Known Issues
>>   
>> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
>> index 9dabcb5ae28e..90bfbf14d1f7 100644
>> --- a/lib/ethdev/rte_ethdev.c
>> +++ b/lib/ethdev/rte_ethdev.c
>> @@ -1269,6 +1269,7 @@ int
>>   rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t
>> nb_tx_q,
>>     const struct rte_eth_conf *dev_conf)
>>   {
>> +    enum rte_eth_hash_function algorithm;
>>   struct rte_eth_dev *dev;
>>   struct rte_eth_dev_info dev_info;
>>   struct rte_eth_conf orig_conf;
>> @@ -1500,6 +1501,18 @@ rte_eth_dev_configure(uint16_t port_id,
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>   goto rollback;
>>   }
>>   +    algorithm = dev_conf->rx_adv_conf.rss_conf.algorithm;
>> +    if ((dev_info.rss_algo_capa &
>> + RTE_ETH_HASH_ALGO_TO_CAPA(algorithm)) == 0) {
> need to check the algorithm.
> its value should be in range of 0 to 31.
>> +    RTE_ETHDEV_LOG(ERR,
>> +    "Ethdev port_id=%u config unsupported RSS hash algorithm:
>> %u "
>> +    "with rss_algo_capa: %x\n",
> It seems that this log is not friendly to user.
> Configured RSS hash algorithm (%u) is not in the algorithm capability ().
> Anything ok like that.
> 
> %x --> 0x%" PRIx32 "
>> +    port_id, algorithm,
>> +    dev_info.rss_algo_capa);
>> +    ret = -EINVAL;
>> +    goto rollback;
>> +    }
>> +
>>   /*
>>    * Setup new number of Rx/Tx queues and reconfigure device.
>>    */
>> @@ -3757,6 +3770,7 @@ rte_eth_dev_info_get(uint16_t port_id, struct
>> rte_eth_dev_info *dev_info)
>>   dev_info->min_mtu = RTE_ETHER_MIN_LEN - RTE_ETHER_HDR_LEN -
>>   RTE_ETHER_CRC_LEN;
>>   dev_info->max_mtu = UINT16_MAX;
>> +    dev_info->rss_algo_capa = RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT);
>>     if (*dev->dev_ops->dev_infos_get == NULL)
>>   return -ENOTSUP;
>> @@ -4698,6 +4712,16 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
>>   return -ENOTSUP;
>>   }
>>   +    if ((dev_info.rss_algo_capa &
>> + RTE_ETH_HASH_ALGO_TO_CAPA(rss_conf->algorithm)) == 0) {
>> +    RTE_ETHDEV_LOG(ERR,
>> +    "Ethdev port_id=%u config unsupported RSS hash algorithm:
>> %u "
>> +    "with rss_algo_capa: %x\n",
>> +    port_id, rss_conf->algorithm,
>> +    dev_info.rss_algo_capa);
>> +    return -EINVAL;
>> +    }
>> +
>>   if (*dev->dev_ops->rss_hash_update == NULL)
>>   return -ENOTSUP;
>>   ret = eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
>> @@ -4725,6 +4749,8 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
>>   return -EINVAL;
>>   }
>>   +    rss_conf->algorithm = RTE_ETH_HASH_FUNCTION_DEFAULT;
>> +
>>   if (*dev->dev_ops->rss_hash_conf_get == NULL)
>>   return -ENOTSUP;
>>   ret = eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev

Re: [PATCH v3] config/arm: update aarch32 build with gcc13

2023-11-01 Thread Paul Szczepanek



On 25/10/2023 13:57, Juraj Linkeš wrote:

The aarch32 with gcc13 fails with:

Compiler for C supports arguments -march=armv8-a: NO

../config/arm/meson.build:714:12: ERROR: Problem encountered: No
suitable armv8 march version found.

This is because we test -march=armv8-a alone (without the -mpfu option),
which is no longer supported in gcc13 aarch32 builds.

The most recent recommendation from the compiler team is to build with
-march=armv8-a+simd -mfpu=auto, which should work for compilers old and
new. The suggestion is to first check -march=armv8-a+simd and only then
check -mfpu=auto.

To address this, add a way to force the architecture (the value of
the -march option).

Signed-off-by: Juraj Linkeš 
---
  config/arm/meson.build | 40 +++-
  1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/config/arm/meson.build b/config/arm/meson.build
index 3f22d8a2fc..c3f763764a 100644
--- a/config/arm/meson.build
+++ b/config/arm/meson.build
@@ -43,7 +43,9 @@ implementer_generic = {
  },
  'generic_aarch32': {
  'march': 'armv8-a',
-'compiler_options': ['-mfpu=neon'],
+'force_march': true,
+'march_features': ['simd'],
+'compiler_options': ['-mfpu=auto'],
  'flags': [
  ['RTE_ARCH_ARM_NEON_MEMCPY', false],
  ['RTE_ARCH_STRICT_ALIGN', true],
@@ -695,21 +697,25 @@ if update_flags
  # probe supported archs and their features
  candidate_march = ''
  if part_number_config.has_key('march')
-supported_marchs = ['armv8.6-a', 'armv8.5-a', 'armv8.4-a', 'armv8.3-a',
-'armv8.2-a', 'armv8.1-a', 'armv8-a']
-check_compiler_support = false
-foreach supported_march: supported_marchs
-if supported_march == part_number_config['march']
-# start checking from this version downwards
-check_compiler_support = true
-endif
-if (check_compiler_support and
-cc.has_argument('-march=' + supported_march))
-candidate_march = supported_march
-# highest supported march version found
-break
-endif
-endforeach
+if part_number_config.get('force_march', false)
+candidate_march = part_number_config['march']
+else
+supported_marchs = ['armv8.6-a', 'armv8.5-a', 'armv8.4-a', 
'armv8.3-a',
+'armv8.2-a', 'armv8.1-a', 'armv8-a']
+check_compiler_support = false
+foreach supported_march: supported_marchs
+if supported_march == part_number_config['march']
+# start checking from this version downwards
+check_compiler_support = true
+endif
+if (check_compiler_support and
+cc.has_argument('-march=' + supported_march))
+candidate_march = supported_march
+# highest supported march version found
+break
+endif
+endforeach
+endif
  if candidate_march == ''
  error('No suitable armv8 march version found.')
  endif
@@ -741,7 +747,7 @@ if update_flags
  # apply supported compiler options
  if part_number_config.has_key('compiler_options')
  foreach flag: part_number_config['compiler_options']
-if cc.has_argument(flag)
+if cc.has_multi_arguments(machine_args + [flag])
  machine_args += flag
  else
  warning('Configuration compiler option ' +



Reviewed-by: Paul Szczepanek 



Re: [PATCH v8 01/10] ethdev: overwrite some comment related to RSS

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 7:40 AM, Jie Hai wrote:
> 1. overwrite the comments of fields of 'rte_eth_rss_conf'.
> 2. Add comments for RTE_ETH_HASH_FUNCTION_DEFAULT.
> 
> Signed-off-by: Jie Hai 
> ---
>  lib/ethdev/rte_ethdev.h | 34 +++---
>  lib/ethdev/rte_flow.h   |  1 +
>  2 files changed, 20 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index a53dd5a1efec..343a134fdd12 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -448,24 +448,28 @@ struct rte_vlan_filter_conf {
>  /**
>   * A structure used to configure the Receive Side Scaling (RSS) feature
>   * of an Ethernet port.
> - * If not NULL, the *rss_key* pointer of the *rss_conf* structure points
> - * to an array holding the RSS key to use for hashing specific header
> - * fields of received packets. The length of this array should be indicated
> - * by *rss_key_len* below. Otherwise, a default random hash key is used by
> - * the device driver.
> - *
> - * The *rss_key_len* field of the *rss_conf* structure indicates the length
> - * in bytes of the array pointed by *rss_key*. To be compatible, this length
> - * will be checked in i40e only. Others assume 40 bytes to be used as before.
> - *
> - * The *rss_hf* field of the *rss_conf* structure indicates the different
> - * types of IPv4/IPv6 packets to which the RSS hashing must be applied.
> - * Supplying an *rss_hf* equal to zero disables the RSS feature.
>   */
>  struct rte_eth_rss_conf {
> - uint8_t *rss_key;/**< If not NULL, 40-byte hash key. */
> + /**
> +  * In rte_eth_dev_rss_hash_conf_get(), the *rss_key_len* should be
> +  * greater than or equal to the *hash_key_size* which get from
> +  * rte_eth_dev_info_get() API. And the *rss_key* should contain at least
> +  * *hash_key_size* bytes. If not meet these requirements, the query
> +  * result is unreliable even if the operation returns success.
> +  *
> +  * In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), if
> +  * *rss_key* is not NULL, the *rss_key_len* indicates the length of the
> +  * *rss_key* in bytes of the array pointed by *rss_key*, 
>

I think it is sufficient to say "length of the *rss_key* in bytes".


>  and it should
> +  * be equal to *hash_key_size*. 
>

I don't know if we missed something here, first driver reports key size
via 'rte_eth_dev_info_get()::hash_key_size', later other APIs require
'rss_key_len' parameter that should be same as 'hash_key_size', as
driver already know this parameter why we are requesting it back from
the application?


>>  Otherwise, drivers are free to use a
> +  * random or a default key or to ignore this configuration.
> +  */
>


I guess above clause describes when 'rss_key' is null, can you please
clarify it as following, perhaps with a line break:
"If *rss_key* is NULL, drivers are free to use a random or a default key."


For the "Drivers are free to ignore the *rss_key_len* and assume key
length is 40 bytes." part, as checks in ethdev layer now forces
application to provide 'rss_key_len' as 'hash_key_size', I think we can
remove above, as application will provide 40 bytes when it is the case.

My concern is this check now can break some applications, because
'rss_key_len' wasn't mandatory previously, but it became now.


Overall it becomes:
"
In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), if
*rss_key* is not NULL, the *rss_key_len* indicates the length of the
*rss_key* in bytes and it should be equal to *hash_key_size*.
If *rss_key* is NULL, drivers are free to use a random or a default key.
"



> + uint8_t *rss_key;
>   uint8_t rss_key_len; /**< hash key length in bytes. */
> - uint64_t rss_hf; /**< Hash functions to apply - see below. */
> + /**
> +  * Indicates the type of packets or the specific part of packets to
> +  * which RSS hashing is to be applied.
> +  */
> + uint64_t rss_hf;
>  };
>  
>  /*
> diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h
> index c16fe8c21f2f..751c29a0f3f3 100644
> --- a/lib/ethdev/rte_flow.h
> +++ b/lib/ethdev/rte_flow.h
> @@ -3226,6 +3226,7 @@ struct rte_flow_query_count {
>   * Hash function types.
>   */
>  enum rte_eth_hash_function {
> + /** DEFAULT means driver decides which hash algorithm to pick. */
>   RTE_ETH_HASH_FUNCTION_DEFAULT = 0,
>   RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */
>   RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */



Re: [PATCH v8 02/10] lib/ethdev: check RSS key length

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 7:40 AM, Jie Hai wrote:
> In rte_eth_dev_rss_hash_conf_get(), the "rss_key_len" should be
> greater than or equal to the "hash_key_size" which get from
> rte_eth_dev_info_get() API. And the "rss_key" should contain at
> least "hash_key_size" bytes. If these requirements are not met,
> the query unreliable.
> 
> In rte_eth_dev_rss_hash_update() or rte_eth_dev_configure(), the
> "rss_key_len" indicates the length of the "rss_key" in bytes of
> the array pointed by "rss_key", it should be equal to the
> "hash_key_size" if "rss_key" is not NULL.
> 
> This patch checks "rss_key_len" in ethdev level.
> 

Can you please squash this patch and previous one, previous one
clarifies the API and this one adds relevant checks, so they con be in
some patch.

Can you also please update release notes, 'API Changes', explaining
'rss_conf.rss_key_len' needs to be provided by user for the case
"conf.rss_key != NULL", it won't be taken as default 40 bytes anymore.


> Signed-off-by: Jie Hai 
> ---
>  lib/ethdev/rte_ethdev.c | 31 +++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index af23ac0ad00f..07bb35833ba6 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1500,6 +1500,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
> nb_rx_q, uint16_t nb_tx_q,
>   goto rollback;
>   }
>  
> + if (dev_conf->rx_adv_conf.rss_conf.rss_key != NULL &&
> + dev_conf->rx_adv_conf.rss_conf.rss_key_len < 
> dev_info.hash_key_size) {
>

Why check is "rss_key_len < dev_info.hash_key_size", is it allowed to
have "rss_key_len > dev_info.hash_key_size"?

Shouldn't it enforce that "rss_key_len == dev_info.hash_key_size"?


> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u invalid RSS key len: %u, valid 
> value: %u\n",
> + port_id, dev_conf->rx_adv_conf.rss_conf.rss_key_len,
> + dev_info.hash_key_size);
> + ret = -EINVAL;
> + goto rollback;
> + }
> +
>   /*
>* Setup new number of Rx/Tx queues and reconfigure device.
>*/
> @@ -4698,6 +4708,14 @@ rte_eth_dev_rss_hash_update(uint16_t port_id,
>   return -ENOTSUP;
>   }
>  
> + if (rss_conf->rss_key != NULL &&
> + rss_conf->rss_key_len != dev_info.hash_key_size) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u invalid RSS key len: %u, valid 
> value: %u\n",
> + port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
> + return -EINVAL;
> + }
> +
>   if (*dev->dev_ops->rss_hash_update == NULL)
>   return -ENOTSUP;
>   ret = eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev,
> @@ -4712,6 +4730,7 @@ int
>  rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
> struct rte_eth_rss_conf *rss_conf)
>  {
> + struct rte_eth_dev_info dev_info = { 0 };
>   struct rte_eth_dev *dev;
>   int ret;
>  
> @@ -4725,6 +4744,18 @@ rte_eth_dev_rss_hash_conf_get(uint16_t port_id,
>   return -EINVAL;
>   }
>  
> + ret = rte_eth_dev_info_get(port_id, &dev_info);
> + if (ret != 0)
> + return ret;
> +
> + if (rss_conf->rss_key != NULL &&
> + rss_conf->rss_key_len < dev_info.hash_key_size) {
> + RTE_ETHDEV_LOG(ERR,
> + "Ethdev port_id=%u invalid RSS key len: %u, should not 
> be less than: %u\n",
> + port_id, rss_conf->rss_key_len, dev_info.hash_key_size);
> + return -EINVAL;
> + }
> +
>   if (*dev->dev_ops->rss_hash_conf_get == NULL)
>   return -ENOTSUP;
>   ret = eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev,



Re: [PATCH v8 03/10] ethdev: support setting and querying RSS algorithm

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 7:40 AM, Jie Hai wrote:
> Currently, rte_eth_rss_conf supports configuring and querying
> RSS hash functions, rss key and it's length, but not RSS hash
> algorithm.
> 
> The structure ``rte_eth_dev_info`` is extended by adding a new
> field "rss_algo_capa". Drivers are responsible for reporting this
> capa and configurations of RSS hash algorithm can be verified based
> on the capability. The default value of "rss_algo_capa" is
> RTE_ETH_HASH_ALGO_CAPA_MASK(DEFAULT) if drivers do not report it.
> 
> The structure ``rte_eth_rss_conf`` is extended by adding a new
> field "algorithm". This represents the RSS algorithms to apply.
> If the value of "algorithm" used for configuration is a gibberish
> value, drivers should report the error.
> 
> To check whether the drivers report valid "algorithm", it is set
> to default value before querying in rte_eth_dev_rss_hash_conf_get().
> 
> Signed-off-by: Jie Hai 
> Signed-off-by: Dongdong Liu 
> Acked-by: Huisong Li 
> ---
>  doc/guides/rel_notes/release_23_11.rst |  5 +
>  lib/ethdev/rte_ethdev.c| 26 +++
>  lib/ethdev/rte_ethdev.h| 29 ++
>  lib/ethdev/rte_flow.c  |  1 -
>  lib/ethdev/rte_flow.h  | 26 ++-
>  5 files changed, 62 insertions(+), 25 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_23_11.rst 
> b/doc/guides/rel_notes/release_23_11.rst
> index 95db98d098d8..e207786044f9 100644
> --- a/doc/guides/rel_notes/release_23_11.rst
> +++ b/doc/guides/rel_notes/release_23_11.rst
> @@ -372,6 +372,11 @@ ABI Changes
>  * security: struct ``rte_security_ipsec_sa_options`` was updated
>due to inline out-of-place feature addition.
>  
> +* ethdev: Added "rss_algo_capa" field to ``rte_eth_dev_info`` structure for
> +* reporting RSS hash algorithm capability.
> +
> +* ethdev: Added "algorithm" field to ``rte_eth_rss_conf`` structure for RSS
> +  hash algorithm.
>  

As well as ABI change, can you also update the "New Features", to
document getting hash algorithm capability and setting hash algorithm
support added?

Also please add an empty line here.

>  Known Issues
>  
> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index 07bb35833ba6..f9bd99d07eb1 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1269,6 +1269,7 @@ int
>  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
> const struct rte_eth_conf *dev_conf)
>  {
> + enum rte_eth_hash_function algorithm;
>   struct rte_eth_dev *dev;
>   struct rte_eth_dev_info dev_info;
>   struct rte_eth_conf orig_conf;
> @@ -1510,6 +1511,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t 
> nb_rx_q, uint16_t nb_tx_q,
>   goto rollback;
>   }
>  
> + algorithm = dev_conf->rx_adv_conf.rss_conf.algorithm;
> + if (RTE_ETH_HASH_ALGO_TO_CAPA(algorithm) == 0 ||
>

"RTE_ETH_HASH_ALGO_TO_CAPA(algorithm)" can't be zero for valid "enum
rte_eth_hash_function" values, I assume above check is for the case
algorith > 31, as it will result zero.
My concern is, this is undefined behaviour (shift left >= 32) and some
compiler can complain about it, instead of relying this can you please
add explicit "0 <= algorithm < 32" check?





Re: [PATCH v8 10/10] app/testpmd: add RSS hash algorithms display

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 7:40 AM, Jie Hai wrote:
> Add the command "show port X rss-hash algorithm" to display
> the RSS hash algorithms of port X. An example is shown:
> 
> testpmd> show port 0 rss-hash algorithm
> RSS algorithm:
>   toeplitz
> 
> Signed-off-by: Jie Hai 
> Acked-by: Huisong Li 
> ---
>  app/test-pmd/cmdline.c | 29 -
>  app/test-pmd/config.c  | 29 ++---
>  app/test-pmd/testpmd.h |  2 +-
>

Can you please update testpmd documentation,
'doc/guides/testpmd_app_ug/testpmd_funcs.rst', too?

<...>

> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index b9fdb7e8f162..23fb4f8aa781 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -1504,24 +1504,7 @@ rss_config_display(struct rte_flow_action_rss 
> *rss_conf)
>   printf(" %d", rss_conf->queue[i]);
>   printf("\n");
>  
> - printf(" function: ");
> - switch (rss_conf->func) {
> - case RTE_ETH_HASH_FUNCTION_DEFAULT:
> - printf("default\n");
> - break;
> - case RTE_ETH_HASH_FUNCTION_TOEPLITZ:
> - printf("toeplitz\n");
> - break;
> - case RTE_ETH_HASH_FUNCTION_SIMPLE_XOR:
> - printf("simple_xor\n");
> - break;
> - case RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ:
> - printf("symmetric_toeplitz\n");
> - break;
> - default:
> - printf("Unknown function\n");
> - return;
> - }
> + printf(" function: %s\n", rte_eth_dev_rss_algo_name(rss_conf->func));
>  


Above modification can be moved to the patch that adds
'rte_eth_dev_rss_algo_name()'.



RE: [PATCH] test/dma: fix for buffer auto free

2023-11-01 Thread Hemant Agrawal
> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c index
> 216f84b6bb..3d4cb37ee6 100644
> --- a/app/test/test_dmadev.c
> +++ b/app/test/test_dmadev.c
> @@ -49,6 +49,8 @@ struct dma_add_test dma_add_test[] = {
>   [TEST_M2D_AUTO_FREE] = {.name = "m2d_auto_free", .enabled =
> false},  };
> 
> +static bool dev_init;

[Hemant]  should't it be per device id? 

> +
>  static void
>  __rte_format_printf(3, 4)
>  print_err(const char *func, int lineno, const char *format, ...) @@ -837,7
> +839,6 @@ test_m2d_auto_free(int16_t dev_id, uint16_t vchan)
>   };
>   uint32_t buf_cnt1, buf_cnt2;
>   struct rte_mempool_ops *ops;
> - static bool dev_init;
>   uint16_t nb_done = 0;
>   bool dma_err = false;
>   int retry = 100;
> @@ -1011,6 +1012,7 @@ test_dmadev_instance(int16_t dev_id)
> 
>   if ((info.dev_capa & RTE_DMA_CAPA_M2D_AUTO_FREE) &&
>   dma_add_test[TEST_M2D_AUTO_FREE].enabled == true) {
> + dev_init = false;
>   if (runtest("m2d_auto_free", test_m2d_auto_free, 128,
> dev_id, vchan,
>   CHECK_ERRS) < 0)
>   goto err;
> --
> 2.25.1



[PATCH] net/mlx5: replenish MPRQ buffers for miniCQEs

2023-11-01 Thread Alexander Kozyrev
Keep unzipping if the next CQE is the miniCQE array in
rxq_cq_decompress_v() routine only for non-MPRQ scenario,
MPRQ requires buffer replenishment between the miniCQEs.

Restore the check for the initial compressed CQE for SPRQ
and check that the current CQE is not compressed before
copying it as a possible title CQE.

Signed-off-by: Alexander Kozyrev 
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 46 
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h |  6 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h|  6 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h |  6 ++--
 4 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 2363d7ed27..ea1c497b90 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -331,6 +331,15 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf 
**pkts,
}
/* At this point, there shouldn't be any remaining packets. */
MLX5_ASSERT(rxq->decompressed == 0);
+   /* Go directly to unzipping in case the first CQE is compressed. */
+   if (rxq->cqe_comp_layout) {
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
+   comp_idx = 0;
+   goto decompress;
+   }
+   }
/* Process all the CQEs */
nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
/* If no new CQE seen, return without updating cq_db. */
@@ -345,18 +354,23 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf 
**pkts,
rcvd_pkt += nocmp_n;
/* Copy title packet for future compressed sessions. */
if (rxq->cqe_comp_layout) {
-   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
-   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
-   if (ret != MLX5_CQE_STATUS_SW_OWN ||
-   MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
-   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
-  sizeof(struct rte_mbuf));
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
+   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
+   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
+   if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
+   ret != MLX5_CQE_STATUS_SW_OWN)
+   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
+  sizeof(struct rte_mbuf));
+   }
}
+decompress:
/* Decompress the last CQE if compressed. */
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
-   &elts[nocmp_n]);
+   &elts[nocmp_n], true);
rxq->cq_ci += rxq->decompressed;
/* Return more packets if needed. */
if (nocmp_n < pkts_n) {
@@ -495,18 +509,22 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct 
rte_mbuf **pkts,
rcvd_pkt += cp_pkt;
/* Copy title packet for future compressed sessions. */
if (rxq->cqe_comp_layout) {
-   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
-   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
-   if (ret != MLX5_CQE_STATUS_SW_OWN ||
-   MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
-   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
-  sizeof(struct rte_mbuf));
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
+   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
+   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
+   if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
+   ret != MLX5_CQE_STATUS_SW_OWN)
+   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
+  sizeof(struct rte_mbuf));
+   }
}
/* Decompress the last CQE if compressed. */
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[n

[PATCH v2] net/mlx5: replenish MPRQ buffers for miniCQEs

2023-11-01 Thread Alexander Kozyrev
Keep unzipping if the next CQE is the miniCQE array in
rxq_cq_decompress_v() routine only for non-MPRQ scenario,
MPRQ requires buffer replenishment between the miniCQEs.

Restore the check for the initial compressed CQE for SPRQ
and check that the current CQE is not compressed before
copying it as a possible title CQE.

Signed-off-by: Alexander Kozyrev 
---
 drivers/net/mlx5/mlx5_rxtx_vec.c | 56 ++--
 drivers/net/mlx5/mlx5_rxtx_vec_altivec.h |  6 ++-
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h|  6 ++-
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h |  6 ++-
 4 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 2363d7ed27..1872bf310c 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -331,6 +331,15 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf 
**pkts,
}
/* At this point, there shouldn't be any remaining packets. */
MLX5_ASSERT(rxq->decompressed == 0);
+   /* Go directly to unzipping in case the first CQE is compressed. */
+   if (rxq->cqe_comp_layout) {
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
+   comp_idx = 0;
+   goto decompress;
+   }
+   }
/* Process all the CQEs */
nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
/* If no new CQE seen, return without updating cq_db. */
@@ -345,18 +354,23 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf 
**pkts,
rcvd_pkt += nocmp_n;
/* Copy title packet for future compressed sessions. */
if (rxq->cqe_comp_layout) {
-   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
-   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
-   if (ret != MLX5_CQE_STATUS_SW_OWN ||
-   MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
-   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
-  sizeof(struct rte_mbuf));
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) {
+   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
+   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
+   if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED ||
+   ret != MLX5_CQE_STATUS_SW_OWN)
+   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
+  sizeof(struct rte_mbuf));
+   }
}
+decompress:
/* Decompress the last CQE if compressed. */
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
-   &elts[nocmp_n]);
+   &elts[nocmp_n], true);
rxq->cq_ci += rxq->decompressed;
/* Return more packets if needed. */
if (nocmp_n < pkts_n) {
@@ -482,6 +496,15 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct 
rte_mbuf **pkts,
}
/* At this point, there shouldn't be any remaining packets. */
MLX5_ASSERT(rxq->decompressed == 0);
+   /* Go directly to unzipping in case the first CQE is compressed. */
+   if (rxq->cqe_comp_layout) {
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_SW_OWN &&
+   (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) {
+   comp_idx = 0;
+   goto decompress;
+   }
+   }
/* Process all the CQEs */
nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
/* If no new CQE seen, return without updating cq_db. */
@@ -495,18 +518,23 @@ rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct 
rte_mbuf **pkts,
rcvd_pkt += cp_pkt;
/* Copy title packet for future compressed sessions. */
if (rxq->cqe_comp_layout) {
-   next = &(*rxq->cqes)[rxq->cq_ci & q_mask];
-   ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci);
-   if (ret != MLX5_CQE_STATUS_SW_OWN ||
-   MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED)
-   rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1],
-  sizeof(struct rte_mbuf));
+   ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci);
+   if (ret == MLX5_CQE_STATUS_S

Re: [PATCH v2] net/bonding: fix illegal memory accesses

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 2:19 AM, Chaoyong He wrote:
> From: Long Wu 
> 
> CI found that overrunning array of 32 2-byte elements at
> element index 65535 (byte offset 131071) by dereferencing
> pointer "members + agg_new_idx".
> 
> Coverity issue: 403099
> Fixes: 6d72657 ("net/bonding: add other aggregator modes")
> Cc: danielx.t.mrzyg...@intel.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Long Wu 
> Reviewed-by: Chaoyong He 
> Reviewed-by: Peng Zhang 
>

Acked-by: Ferruh Yigit 

Applied to dpdk-next-net/main, thanks.


Re: [PATCH v3 0/3] introduce maximum Rx buffer size

2023-11-01 Thread Stephen Hemminger
On Wed, 1 Nov 2023 10:36:07 +0800
"lihuisong (C)"  wrote:

> > Do we need to report this size? It's a common feature for all PMDs.  
> > It would make sense then to have max_rx_bufsize set to 16K by default
> > in ethdev, and PMD could then raise/lower based on hardware.  
> It is not appropriate to set to 16K by default in ethdev layer.
> Because I don't see any check for the upper bound in some driver, like 
> axgbe, enetc and so on.
> I'm not sure if they have no upper bound.
> And some driver's maximum buffer size is "16384(16K) - 128"
> So it's better to set to UINT32_MAX by default.
> what do you think?

The goal is always giving application a working upper bound, and enforcing
that as much as possible in ethdev layer. It doesnt matter which pattern
does that.  Fortunately, telling application an incorrect answer is not fatal.
If over estimated, application pool would be wasting space.
If under estimated, application will get more fragmented packets.


[PATCH] maintainers: update for mempool library

2023-11-01 Thread Morten Brørup
Add co-maintainer for Memory pool.

Suggested-by: Thomas Monjalon 
Signed-off-by: Morten Brørup 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4083658697..8a4e9f0a9e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -379,6 +379,7 @@ T: git://dpdk.org/dpdk
 
 Memory pool
 M: Andrew Rybchenko 
+M: Morten Brørup 
 F: lib/mempool/
 F: drivers/mempool/ring/
 F: doc/guides/prog_guide/mempool_lib.rst
-- 
2.17.1



Re: [PATCH] maintainers: update for mempool library

2023-11-01 Thread Andrew Rybchenko



On November 1, 2023 19:20:29 Morten Brørup  wrote:


Add co-maintainer for Memory pool.

Suggested-by: Thomas Monjalon 
Signed-off-by: Morten Brørup 


Acked-by: Andrew Rybchenko 


---
MAINTAINERS | 1 +
1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4083658697..8a4e9f0a9e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -379,6 +379,7 @@ T: git://dpdk.org/dpdk

Memory pool
M: Andrew Rybchenko 
+M: Morten Brørup 
F: lib/mempool/
F: drivers/mempool/ring/
F: doc/guides/prog_guide/mempool_lib.rst
--
2.17.1




Re: [PATCH v2 1/2] net/txgbe: add proper memory barriers in Rx

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 3:32 AM, Jiawen Wu wrote:
> Refer to commit 85e46c532bc7 ("net/ixgbe: add proper memory barriers in
> Rx"). Fix the same issue as ixgbe.
> 
> Segmentation fault has been observed while running the
> txgbe_recv_pkts_lro() function to receive packets on the Loongson 3A5000
> processor. It's caused by the out-of-order execution of CPU. So add a
> proper memory barrier to ensure the read ordering be correct.
> 
> We also did the same thing in the txgbe_recv_pkts() function to make the
> rxd data be valid even though we did not find segmentation fault in this
> function.
> 
> Fixes: 0e484278c85f ("net/txgbe: support Rx")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Jiawen Wu 
>

Series applied to dpdk-next-net/main, thanks.


Re: [PATCH] ethdev: fix 32-bit build with GCC-13

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 8:12 AM, Ori Kam wrote:
> Hi
> 
>> -Original Message-
>> From: Ruifeng Wang 
>> Sent: Wednesday, November 1, 2023 9:16 AM
>>
>> aarch32 build with gcc-13.0.1 generated following warning:
>>
>> In function 'memcpy',
>> inlined from 'rte_memcpy' at
>> ../lib/eal/arm/include/rte_memcpy_32.h:296:9,
>> inlined from 'rte_flow_conv_action_conf' at 
>> ../lib/ethdev/rte_flow.c:726:20,
>> inlined from 'rte_flow_conv_actions' at ../lib/ethdev/rte_flow.c:936:10:
>> warning: '__builtin_memcpy' specified bound 4294967264 exceeds maximum
>> object size 2147483647 [-Wstringop-overflow=]
>>
>> The issue is due to possible wrapping in unsigned arithmetic.
>> The 'size' can be 0. 'off' is 32. When 'tmp' is equal to (unsigned)-32,
>> the copy length is more than half the address space. Hence the warning.
>>
>> Casted variables to 64-bit to avoid wrapping.
>>
>> Fixes: 063911ee1df4 ("ethdev: add flow API object converter")
>> Cc: sta...@dpdk.org
>>
>> Reported-by: Luca Boccassi 
>> Signed-off-by: Ruifeng Wang 
>>>
> Acked-by: Ori Kam 
> 

Acked-by: Ferruh Yigit 

Applied to dpdk-next-net/main, thanks.



Re: [PATCH] net/nfp: fix offload flags of the security capabilities

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 7:31 AM, Chaoyong He wrote:
> From: Shihong Wang 
> 
> This patch resolves configuration error of ol_flags in the
> rte_security_capability. Currently ol_flags in the ingress direction
> of the SA, 'RTE_SECURITY_TX_OLOAD_NEED_MDATA' is configured. In fact,
> ol_flags only in the egress direction of the SA needs to be configured.
> 
> Fixes: e6d69ea011c9 ("net/nfp: get security capabilities and session size")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Shihong Wang 
> Reviewed-by: Chaoyong He 
> Reviewed-by: Peng Zhang 
>

Acked-by: Ferruh Yigit 


Re: [PATCH v2 04/11] net/nfp: remove the unneeded data abstraction

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 7:53 AM, Chaoyong He wrote:
> The data structure 'struct nfp_net_adapter' has only one data field and
> we won't extend it in the future, which makes this abstraction unneeded,
> so remove this data structure and the related macro
> 'NFP_NET_DEV_PRIVATE_TO_HW'.
> 

Mentioned abstract struct, 'struct nfp_net_adapter', is not removed in
this patch, although mentioned macro removed.

Since there is not user of the struct after this patch, I guess
intention was to remove the struct, so if there is no other issue I can
remove the struct while merging.


> Signed-off-by: Chaoyong He 
> Reviewed-by: Peng Zhang 



[PATCH v6 0/3] net/tap: build and fix for BPF program

2023-11-01 Thread Stephen Hemminger
Update the documentation and tools to build the BPF program used by
tap device. And apply fix to the RSS algorithm to correctly
handle non-IP protocols.

v6 - cosmetic improvements to extract process
 add better boilerplate and fix python lint warnings

Madhuker Mythri (1):
  net/tap: Fixed RSS algorithm to support fragmented packets

Stephen Hemminger (2):
  net/tap: support infrastructure to build the BPF filter
  net/tap; rebuild and update the BPF flow program

 doc/guides/nics/tap.rst |   11 +-
 drivers/net/tap/bpf/.gitignore  |1 +
 drivers/net/tap/bpf/Makefile|   18 +
 drivers/net/tap/bpf/bpf_api.h   |  275 ++
 drivers/net/tap/bpf/bpf_elf.h   |   53 +
 drivers/net/tap/bpf/bpf_extract.py  |   86 +
 drivers/net/tap/{ => bpf}/tap_bpf_program.c |   57 +-
 drivers/net/tap/tap_bpf_insns.h | 2959 ++-
 drivers/net/tap/tap_rss.h   |2 +-
 9 files changed, 1986 insertions(+), 1476 deletions(-)
 create mode 100644 drivers/net/tap/bpf/.gitignore
 create mode 100644 drivers/net/tap/bpf/Makefile
 create mode 100644 drivers/net/tap/bpf/bpf_api.h
 create mode 100644 drivers/net/tap/bpf/bpf_elf.h
 create mode 100644 drivers/net/tap/bpf/bpf_extract.py
 rename drivers/net/tap/{ => bpf}/tap_bpf_program.c (79%)

-- 
2.41.0



[PATCH v6 1/3] net/tap: support infrastructure to build the BPF filter

2023-11-01 Thread Stephen Hemminger
Move the BPF program related code into a subdirectory.
And add a Makefile for building it.

The code depends on include files from iproute2.
But these are not public headers which iproute2 exports
as a package API. Therefore make a local copy here.

The standalone build was also broken because by
commit ef5baf3486e0 ("replace packed attributes")
which introduced __rte_packed into this code.

Add a python program to extract the resulting BPF into
a format that can be consumed by the TAP driver.

Update the documentation.

Signed-off-by: Stephen Hemminger 
---
 doc/guides/nics/tap.rst |  11 +-
 drivers/net/tap/bpf/.gitignore  |   1 +
 drivers/net/tap/bpf/Makefile|  18 ++
 drivers/net/tap/bpf/bpf_api.h   | 275 
 drivers/net/tap/bpf/bpf_elf.h   |  53 
 drivers/net/tap/bpf/bpf_extract.py  |  86 ++
 drivers/net/tap/{ => bpf}/tap_bpf_program.c |  10 +-
 drivers/net/tap/tap_rss.h   |   2 +-
 8 files changed, 444 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/tap/bpf/.gitignore
 create mode 100644 drivers/net/tap/bpf/Makefile
 create mode 100644 drivers/net/tap/bpf/bpf_api.h
 create mode 100644 drivers/net/tap/bpf/bpf_elf.h
 create mode 100644 drivers/net/tap/bpf/bpf_extract.py
 rename drivers/net/tap/{ => bpf}/tap_bpf_program.c (96%)

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index 07df0d35a2..449e747994 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -256,15 +256,12 @@ C functions under different ELF sections.
 
 2. Install ``LLVM`` library and ``clang`` compiler versions 3.7 and above
 
-3. Compile ``tap_bpf_program.c`` via ``LLVM`` into an object file::
+3. Use make to compile  `tap_bpf_program.c`` via ``LLVM`` into an object file
+   and extract the resulting instructions into ``tap_bpf_insn.h``.
 
-clang -O2 -emit-llvm -c tap_bpf_program.c -o - | llc -march=bpf \
--filetype=obj -o 
+cd bpf; make
 
-
-4. Use a tool that receives two parameters: an eBPF object file and a section
-name, and prints out the section as a C array of eBPF instructions.
-Embed the C array in your TAP PMD tree.
+4. Recompile the TAP PMD.
 
 The C arrays are uploaded to the kernel using BPF system calls.
 
diff --git a/drivers/net/tap/bpf/.gitignore b/drivers/net/tap/bpf/.gitignore
new file mode 100644
index 00..30a258f1af
--- /dev/null
+++ b/drivers/net/tap/bpf/.gitignore
@@ -0,0 +1 @@
+tap_bpf_program.o
diff --git a/drivers/net/tap/bpf/Makefile b/drivers/net/tap/bpf/Makefile
new file mode 100644
index 00..59844c616f
--- /dev/null
+++ b/drivers/net/tap/bpf/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# This file is not built as part of normal DPDK build.
+# It is used to generate the eBPF code for TAP RSS.
+CLANG=clang
+CLANG_OPTS=-O2
+TARGET=../tap_bpf_insns.h
+
+all: $(TARGET)
+
+clean:
+   rm tap_bpf_program.o $(TARGET)
+
+tap_bpf_program.o: tap_bpf_program.c
+   $(CLANG) $(CLANG_OPTS) -emit-llvm -c $< -o - | \
+   llc -march=bpf -filetype=obj -o $@
+
+$(TARGET): tap_bpf_program.o
+   python3 bpf_extract.py -stap_bpf_program.c -o $@ $<
diff --git a/drivers/net/tap/bpf/bpf_api.h b/drivers/net/tap/bpf/bpf_api.h
new file mode 100644
index 00..5887d3a851
--- /dev/null
+++ b/drivers/net/tap/bpf/bpf_api.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+
+#include 
+
+#include "bpf_elf.h"
+
+/** libbpf pin type. */
+enum libbpf_pin_type {
+   LIBBPF_PIN_NONE,
+   /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+   LIBBPF_PIN_BY_NAME,
+};
+
+/** Type helper macros. */
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X)#X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused__attribute__((__unused__))
+#endif
+
+#ifndef offsetof
+# define offsetof(TYPE, MEMBER)__builtin_offsetof(TYPE, MEMBER)
+#endif
+
+#ifndef likely
+# define likely(X) __builtin_expect(!!(X), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(X)   __builtin_expect(!!(X), 0)
+#endif
+
+#ifndef htons
+# define htons(X)  __constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X)  __constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X)  __constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X)  __constant_ntohl((X))
+#endif
+
+#ifndef __inline__
+# define __inline____at

[PATCH v6 2/3] net/tap: Fixed RSS algorithm to support fragmented packets

2023-11-01 Thread Stephen Hemminger
From: Madhuker Mythri 

As per analysis on Tap PMD, the existing RSS algorithm considering
4-tuple(Src-IP, Dst-IP, Src-port and Dst-port) and identification of
fragment packets is not done, thus we are seeing all the fragmented
chunks of single packet differs in RSS hash value and distributed across
multiple queues.
The RSS algorithm assumes that, all the incoming IP packets are based on
L4-protocol(UDP/TCP) and trying to fetch the L4 fields(Src-port and
Dst-port) for each incoming packet, but for the fragmented chunks these
L4-header will not be present(except for first packet) and should not
consider in RSS hash for L4 header fields in-case of fragmented chunks.
Which is a bug in the RSS algorithm implemented in the BPF functionality
under TAP PMD.

So, modified the RSS eBPF C-program and generated the structure of
C-array in the 'tap_bpf_insns.h' file, which is in eBPF byte-code
instructions format.

Bugzilla Id: 870

Signed-off-by: Madhuker Mythri 
Signed-off-by: Stephen Hemminger 
---
 drivers/net/tap/bpf/tap_bpf_program.c | 47 ++-
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tap/bpf/tap_bpf_program.c 
b/drivers/net/tap/bpf/tap_bpf_program.c
index d65021d8a1..369c7b107f 100644
--- a/drivers/net/tap/bpf/tap_bpf_program.c
+++ b/drivers/net/tap/bpf/tap_bpf_program.c
@@ -19,6 +19,8 @@
 #include "bpf_elf.h"
 #include "../tap_rss.h"
 
+#include "bpf_api.h"
+
 /** Create IPv4 address */
 #define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
(((b) & 0xff) << 16) | \
@@ -133,6 +135,8 @@ rss_l3_l4(struct __sk_buff *skb)
__u8 *key = 0;
__u32 len;
__u32 queue = 0;
+   bool mf = 0;
+   __u16 frag_off = 0;
 
rsskey = map_lookup_elem(&map_keys, &key_idx);
if (!rsskey) {
@@ -157,6 +161,8 @@ rss_l3_l4(struct __sk_buff *skb)
return TC_ACT_OK;
 
__u8 *src_dst_addr = data + off + offsetof(struct iphdr, saddr);
+   __u8 *frag_off_addr = data + off + offsetof(struct iphdr, 
frag_off);
+   __u8 *prot_addr = data + off + offsetof(struct iphdr, protocol);
__u8 *src_dst_port = data + off + sizeof(struct iphdr);
struct ipv4_l3_l4_tuple v4_tuple = {
.src_addr = IPv4(*(src_dst_addr + 0),
@@ -167,11 +173,25 @@ rss_l3_l4(struct __sk_buff *skb)
*(src_dst_addr + 5),
*(src_dst_addr + 6),
*(src_dst_addr + 7)),
-   .sport = PORT(*(src_dst_port + 0),
-   *(src_dst_port + 1)),
-   .dport = PORT(*(src_dst_port + 2),
-   *(src_dst_port + 3)),
+   .sport = 0,
+   .dport = 0,
};
+   /** Fetch the L4-payer port numbers only in-case of TCP/UDP
+** and also if the packet is not fragmented. Since fragmented
+** chunks do not have L4 TCP/UDP header.
+**/
+   if (*prot_addr == IPPROTO_UDP || *prot_addr == IPPROTO_TCP) {
+   frag_off = PORT(*(frag_off_addr + 0),
+   *(frag_off_addr + 1));
+   mf = frag_off & 0x2000;
+   frag_off = frag_off & 0x1fff;
+   if (mf == 0 && frag_off == 0) {
+   v4_tuple.sport = PORT(*(src_dst_port + 0),
+   *(src_dst_port + 1));
+   v4_tuple.dport = PORT(*(src_dst_port + 2),
+   *(src_dst_port + 3));
+   }
+   }
__u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
if (rsskey->hash_fields & (1 << HASH_FIELD_IPV4_L3))
input_len--;
@@ -184,6 +204,9 @@ rss_l3_l4(struct __sk_buff *skb)
offsetof(struct ipv6hdr, saddr);
__u8 *src_dst_port = data + off +
sizeof(struct ipv6hdr);
+   __u8 *next_hdr = data + off +
+   offsetof(struct ipv6hdr, nexthdr);
+
struct ipv6_l3_l4_tuple v6_tuple;
for (j = 0; j < 4; j++)
*((uint32_t *)&v6_tuple.src_addr + j) =
@@ -193,10 +216,18 @@ rss_l3_l4(struct __sk_buff *skb)
*((uint32_t *)&v6_tuple.dst_addr + j) =
__builtin_bswap32(*((uint32_t *)
src_dst_addr + 4 + j));
-   v6_tuple.sport = PORT(*(src_dst_port + 0),
- *(src_dst_port + 1));
-   v6_tuple.dport = PORT(*(src_dst_port + 2),
- *(src_ds

[PATCH v6 3/3] net/tap; rebuild and update the BPF flow program

2023-11-01 Thread Stephen Hemminger
Rebuild with commit
c0335cc197 (tap; rebuild and update the BPF flow program, 2023-10-31)

Signed-off-by: Stephen Hemminger 
---
 drivers/net/tap/tap_bpf_insns.h | 2959 ---
 1 file changed, 1503 insertions(+), 1456 deletions(-)

diff --git a/drivers/net/tap/tap_bpf_insns.h b/drivers/net/tap/tap_bpf_insns.h
index 1a91bbad13..53fa76c4e6 100644
--- a/drivers/net/tap/tap_bpf_insns.h
+++ b/drivers/net/tap/tap_bpf_insns.h
@@ -1,10 +1,10 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd
+ * Auto-generated from tap_bpf_program.c
+ * This not the original source file. Do NOT edit it.
  */
 
 #include 
 
-/* bpf_insn array matching cls_q section. See tap_bpf_program.c file */
 static struct bpf_insn cls_q_insns[] = {
{0x61,2,1,   52, 0x},
{0x18,3,0,0, 0xdeadbeef},
@@ -23,18 +23,17 @@ static struct bpf_insn cls_q_insns[] = {
{0x95,0,0,0, 0x},
 };
 
-/* bpf_insn array matching l3_l4 section. see tap_bpf_program.c file */
 static struct bpf_insn l3_l4_hash_insns[] = {
{0xbf,7,1,0, 0x},
-   {0x61,8,7,   16, 0x},
-   {0x61,6,7,   76, 0x},
+   {0x61,6,7,   16, 0x},
+   {0x61,8,7,   76, 0x},
{0x61,9,7,   80, 0x},
{0x18,1,0,0, 0xdeadbeef},
{0x00,0,0,0, 0x},
{0x63,   10,1,   -4, 0x},
{0xbf,2,   10,0, 0x},
{0x07,2,0,0, 0xfffc},
-   {0x18,1,1,0, 0xcafe},
+   {0x18,1,0,0, 0x},
{0x00,0,0,0, 0x},
{0x85,0,0,0, 0x0001},
{0x55,0,0,   21, 0x},
@@ -58,7 +57,7 @@ static struct bpf_insn l3_l4_hash_insns[] = {
{0x07,1,0,0, 0xffd0},
{0xb7,2,0,0, 0x0023},
{0x85,0,0,0, 0x0006},
-   {0x05,0,0, 1632, 0x},
+   {0x05,0,0, 1680, 0x},
{0xb7,1,0,0, 0x000e},
{0x61,2,7,   20, 0x},
{0x15,2,0,   10, 0x},
@@ -66,1630 +65,1678 @@ static struct bpf_insn l3_l4_hash_insns[] = {
{0x55,2,0,8, 0xa888},
{0xbf,2,7,0, 0x},
{0xb7,7,0,0, 0x},
-   {0xbf,1,6,0, 0x},
+   {0xbf,1,8,0, 0x},
{0x07,1,0,0, 0x0012},
-   {0x2d,1,9, 1622, 0x},
+   {0x2d,1,9, 1670, 0x},
{0xb7,1,0,0, 0x0012},
-   {0x69,8,6,   16, 0x},
+   {0x69,6,8,   16, 0x},
{0xbf,7,2,0, 0x},
+   {0x57,6,0,0, 0x},
{0x7b,   10,7,  -56, 0x},
-   {0x57,8,0,0, 0x},
-   {0x15,8,0,  409, 0xdd86},
+   {0x15,6,0,  443, 0xdd86},
{0xb7,7,0,0, 0x0003},
-   {0x55,8,0, 1614, 0x0008},
-   {0x0f,6,1,0, 0x},
+   {0x55,6,0, 1662, 0x0008},
+   {0x0f,8,1,0, 0x},
{0xb7,7,0,0, 0x},
-   {0xbf,1,6,0, 0x},
+   {0xbf,1,8,0, 0x},
{0x07,1,0,0, 0x0018},
-   {0x2d,1,9, 1609, 0x},
-   {0x71,3,6,   12, 0x},
-   {0xbf,1,3,0, 0x},
-   {0x67,1,0,0, 0x0038},
-   {0xc7,1,0,0, 0x0020},
-   {0x77,1,0,0, 0x001f},
-   {0x57,1,0,0, 0x2cc681d1},
-   {0x67,3,0,0, 0x0018},
+   {0x2d,1,9, 1657, 0x},
+   {0xb7,1,0,0, 0x},
+   {0x71,3,8,   12, 0x},
+   {0x71,2,8,9, 0x},
+   {0x15,2,0,1, 0x0011},
+   {0x55,2,0,   21, 0x0006},
+   {0x71,2,8,7, 0x},
+   {0x71,4,8,6, 0x},
+   {0xbf,5,4,0, 0x},
+   {0x67,5,0,0, 0x0008},
+   {0x57,5,0,0, 0x1f00},
+   {0x4f,5,2,0, 0x},
+   {0x57,4,0,0, 0x0020},
+   {0x4f,4,5,0, 0x},
+   {0x55,4,0,   12, 0x},
+   {0xbf,2,8,0, 0x},
+   {0x07,2,0,0, 0x000

[PATCH v5 0/4] add pointer compression API

2023-11-01 Thread Paul Szczepanek
This patchset is proposing adding a new EAL header with utility functions
that allow compression of arrays of pointers.

When passing caches full of pointers between threads, memory containing
the pointers is copied multiple times which is especially costly between
cores. A compression method will allow us to shrink the memory size
copied.

The compression takes advantage of the fact that pointers are usually
located in a limited memory region (like a mempool). We can compress them
by converting them to offsets from a base memory address.

Offsets can be stored in fewer bytes (dictated by the memory region size
and alignment of the pointer). For example: an 8 byte aligned pointer
which is part of a 32GB memory pool can be stored in 4 bytes. The API is
very generic and does not assume mempool pointers, any pointer can be
passed in.

Compression is based on few and fast operations and especially with vector
instructions leveraged creates minimal overhead.

The API accepts and returns arrays because the overhead means it only is
worth it when done in bulk.

Test is added that shows potential performance gain from compression. In
this test an array of pointers is passed through a ring between two cores.
It shows the gain which is dependent on the bulk operation size. In this
synthetic test run on ampere altra a substantial (up to 25%) performance
gain is seen if done in bulk size larger than 32. At 32 it breaks even and
lower sizes create a small (less than 5%) slowdown due to overhead.

In a more realistic mock application running the l3 forwarding dpdk
example that works in pipeline mode on two cores this translated into a
~5% throughput increase on an ampere altra.

v2:
* addressed review comments (style, explanations and typos)
* lowered bulk iterations closer to original numbers to keep runtime short
* fixed pointer size warning on 32-bit arch
v3:
* added 16-bit versions of compression functions and tests
* added documentation of these new utility functions in the EAL guide
v4:
* added unit test
* fix bug in NEON implementation of 32-bit decompress
v5:
* disable NEON and SVE implementation on AARCH32 due to wrong pointer size

Paul Szczepanek (4):
  eal: add pointer compression functions
  test: add pointer compress tests to ring perf test
  docs: add pointer compression to the EAL guide
  test: add unit test for ptr compression

 .mailmap  |   1 +
 app/test/meson.build  |   1 +
 app/test/test_eal_ptr_compress.c  | 108 ++
 app/test/test_ring.h  |  94 -
 app/test/test_ring_perf.c | 354 --
 .../prog_guide/env_abstraction_layer.rst  | 142 +++
 lib/eal/include/meson.build   |   1 +
 lib/eal/include/rte_ptr_compress.h| 266 +
 8 files changed, 843 insertions(+), 124 deletions(-)
 create mode 100644 app/test/test_eal_ptr_compress.c
 create mode 100644 lib/eal/include/rte_ptr_compress.h

--
2.25.1



[PATCH v5 1/4] eal: add pointer compression functions

2023-11-01 Thread Paul Szczepanek
Add a new utility header for compressing pointers. The provided
functions can store pointers in 32-bit offsets.

The compression takes advantage of the fact that pointers are
usually located in a limited memory region (like a mempool).
We can compress them by converting them to offsets from a base
memory address. Offsets can be stored in fewer bytes (dictated
by the memory region size and alignment of the pointer).
For example: an 8 byte aligned pointer which is part of a 32GB
memory pool can be stored in 4 bytes.

Suggested-by: Honnappa Nagarahalli 
Signed-off-by: Paul Szczepanek 
Signed-off-by: Kamalakshitha Aligeri 
Reviewed-by: Honnappa Nagarahalli 
---
 .mailmap   |   1 +
 lib/eal/include/meson.build|   1 +
 lib/eal/include/rte_ptr_compress.h | 266 +
 3 files changed, 268 insertions(+)
 create mode 100644 lib/eal/include/rte_ptr_compress.h

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..004751d27a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1069,6 +1069,7 @@ Paul Greenwalt 
 Paulis Gributs 
 Paul Luse 
 Paul M Stillwell Jr 
+Paul Szczepanek 
 Pavan Kumar Linga 
 Pavan Nikhilesh  
 Pavel Belous 
diff --git a/lib/eal/include/meson.build b/lib/eal/include/meson.build
index e94b056d46..ce2c733633 100644
--- a/lib/eal/include/meson.build
+++ b/lib/eal/include/meson.build
@@ -36,6 +36,7 @@ headers += files(
 'rte_pci_dev_features.h',
 'rte_per_lcore.h',
 'rte_pflock.h',
+   'rte_ptr_compress.h',
 'rte_random.h',
 'rte_reciprocal.h',
 'rte_seqcount.h',
diff --git a/lib/eal/include/rte_ptr_compress.h 
b/lib/eal/include/rte_ptr_compress.h
new file mode 100644
index 00..47a72e4213
--- /dev/null
+++ b/lib/eal/include/rte_ptr_compress.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: BSD-shift-Clause
+ * Copyright(c) 2023 Arm Limited
+ */
+
+#ifndef RTE_PTR_COMPRESS_H
+#define RTE_PTR_COMPRESS_H
+
+/**
+ * @file
+ * Pointer compression and decompression functions.
+ *
+ * When passing arrays full of pointers between threads, memory containing
+ * the pointers is copied multiple times which is especially costly between
+ * cores. These functions allow us to compress the pointers.
+ *
+ * Compression takes advantage of the fact that pointers are usually located in
+ * a limited memory region (like a mempool). We compress them by converting 
them
+ * to offsets from a base memory address. Offsets can be stored in fewer bytes.
+ *
+ * The compression functions come in two varieties: 32-bit and 16-bit.
+ *
+ * To determine how many bits are needed to compress the pointer calculate
+ * the biggest offset possible (highest value pointer - base pointer)
+ * and shift the value right according to alignment (shift by exponent of the
+ * power of 2 of alignment: aligned by 4 - shift by 2, aligned by 8 - shift by
+ * 3, etc.). The resulting value must fit in either 32 or 16 bits.
+ *
+ * For usage example and further explanation please see "Pointer Compression" 
in
+ * doc/guides/prog_guide/env_abstraction_layer.rst
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compress pointers into 32-bit offsets from base pointer.
+ *
+ * @note It is programmer's responsibility to ensure the resulting offsets fit
+ * into 32 bits. Alignment of the structures pointed to by the pointers allows
+ * us to drop bits from the offsets. This is controlled by the bit_shift
+ * parameter. This means that if structures are aligned by 8 bytes they must be
+ * within 32GB of the base pointer. If there is no such alignment guarantee 
they
+ * must be within 4GB.
+ *
+ * @param ptr_base
+ *   A pointer used to calculate offsets of pointers in src_table.
+ * @param src_table
+ *   A pointer to an array of pointers.
+ * @param dest_table
+ *   A pointer to an array of compressed pointers returned by this function.
+ * @param n
+ *   The number of objects to compress, must be strictly positive.
+ * @param bit_shift
+ *   Byte alignment of memory pointed to by the pointers allows for
+ *   bits to be dropped from the offset and hence widen the memory region that
+ *   can be covered. This controls how many bits are right shifted.
+ **/
+static __rte_always_inline void
+rte_ptr_compress_32(void *ptr_base, void **src_table,
+   uint32_t *dest_table, unsigned int n, unsigned int bit_shift)
+{
+   unsigned int i = 0;
+#if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
+   svuint64_t v_ptr_table;
+   svbool_t pg = svwhilelt_b64(i, n);
+   do {
+   v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
+   v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
+   v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
+   svst1w(pg, &dest_table[i], v_ptr_table);
+   i += svcntd();
+   pg = svwhilelt_b64(i, n);
+   } while (svptest_any(svptrue_b64

[PATCH v5 3/4] docs: add pointer compression to the EAL guide

2023-11-01 Thread Paul Szczepanek
Documentation added in the EAL guide for the new
utility functions for pointer compression
showing example code and potential usecases.

Signed-off-by: Paul Szczepanek 
Reviewed-by: Honnappa Nagarahalli 
---
 .../prog_guide/env_abstraction_layer.rst  | 142 ++
 1 file changed, 142 insertions(+)

diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst 
b/doc/guides/prog_guide/env_abstraction_layer.rst
index 6debf54efb..f04d032442 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -1192,3 +1192,145 @@ will not be deallocated.

 Any successful deallocation event will trigger a callback, for which user
 applications and other DPDK subsystems can register.
+
+.. _pointer_compression:
+
+Pointer Compression
+---
+
+Use ``rte_ptr_compress_16()`` and ``rte_ptr_decompress_16()`` to compress and
+decompress pointers into 16-bit offsets. Use ``rte_ptr_compress_32()`` and
+``rte_ptr_decompress_32()`` to compress and decompress pointers into 32-bit
+offsets.
+
+Compression takes advantage of the fact that pointers are usually located in a
+limited memory region (like a mempool). By converting them to offsets from a
+base memory address they can be stored in fewer bytes. How many bytes are 
needed
+to store the offset is dictated by the memory region size and alignment of
+objects the pointers point to.
+
+For example, a pointer which is part of a 4GB memory pool can be stored as 32
+bit offset. If the pointer points to memory that is 8 bytes aligned then 3 bits
+can be dropped from the offset and a 32GB memory pool can now fit in 32 bits.
+
+For performance reasons these requirements are not enforced programmatically.
+The programmer is responsible for ensuring that the combination of distance
+from the base pointer and memory alignment allow for storing of the offset in
+the number of bits indicated by the function name (16 or 32). Start of mempool
+memory would be a good candidate for the base pointer. Otherwise any pointer
+that precedes all pointers, is close enough and has the same alignment as the
+pointers being compressed will work.
+
+.. note::
+
+Performance gains depend on the batch size of pointers and CPU capabilities
+such as vector extensions. It's important to measure the performance
+increase on target hardware. A test called ``ring_perf_autotest`` in
+``dpdk-test`` can provide the measurements.
+
+Example usage
+~
+
+In this example we send pointers between two cores through a ring. While this
+is a realistic use case the code is simplified for demonstration purposes and
+does not have error handling.
+
+.. code-block:: c
+
+#include 
+#include 
+#include 
+#include 
+
+#define ITEMS_ARRAY_SIZE (1024)
+#define BATCH_SIZE (128)
+#define ALIGN_EXPONENT (3)
+#define ITEM_ALIGN (1<

[PATCH v5 2/4] test: add pointer compress tests to ring perf test

2023-11-01 Thread Paul Szczepanek
Add a test that runs a zero copy burst enqueue and dequeue on a ring
of raw pointers and compressed pointers at different burst sizes to
showcase performance benefits of newly added pointer compression APIs.

Refactored threading code to pass more parameters to threads to
reuse existing code. Added more bulk sizes to showcase their effects
on compression. Adjusted loop iteration numbers to take into account
bulk sizes to keep runtime constant (instead of number of operations).

Adjusted old printfs to match new ones which have aligned numbers.

Signed-off-by: Paul Szczepanek 
Reviewed-by: Honnappa Nagarahalli 
---
 app/test/test_ring.h  |  94 +-
 app/test/test_ring_perf.c | 354 +-
 2 files changed, 324 insertions(+), 124 deletions(-)

diff --git a/app/test/test_ring.h b/app/test/test_ring.h
index 45c263f3ff..3b00f2465d 100644
--- a/app/test/test_ring.h
+++ b/app/test/test_ring.h
@@ -1,10 +1,12 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2019-2023 Arm Limited
  */

 #include 
 #include 
 #include 
+#include 
+#include 

 /* API type to call
  * rte_ring__enqueue_
@@ -25,6 +27,10 @@
 #define TEST_RING_ELEM_BULK 16
 #define TEST_RING_ELEM_BURST 32

+#define TEST_RING_ELEM_BURST_ZC 64
+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16 128
+#define TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32 256
+
 #define TEST_RING_IGNORE_API_TYPE ~0U

 /* This function is placed here as it is required for both
@@ -101,6 +107,9 @@ static inline unsigned int
 test_ring_enqueue(struct rte_ring *r, void **obj, int esize, unsigned int n,
unsigned int api_type)
 {
+   unsigned int ret;
+   struct rte_ring_zc_data zcd = {0};
+
/* Legacy queue APIs? */
if (esize == -1)
switch (api_type) {
@@ -152,6 +161,46 @@ test_ring_enqueue(struct rte_ring *r, void **obj, int 
esize, unsigned int n,
case (TEST_RING_THREAD_MPMC | TEST_RING_ELEM_BURST):
return rte_ring_mp_enqueue_burst_elem(r, obj, esize, n,
NULL);
+   case (TEST_RING_ELEM_BURST_ZC):
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, esize, n, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_memcpy(zcd.ptr1, (char *)obj, zcd.n1 * esize);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_memcpy(zcd.ptr2,
+   (char *)obj + zcd.n1 * esize,
+   (ret - zcd.n1) * esize);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret;
+   case (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_16):
+   /* rings cannot store uint16_t so we use a uint32_t
+* and half the requested number of elements
+* and compensate by doubling the returned numbers
+*/
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, sizeof(uint32_t), n / 2, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_ptr_compress_16(0, obj, zcd.ptr1, zcd.n1 * 2, 3);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_ptr_compress_16(0,
+   obj + (zcd.n1 * 2),
+   zcd.ptr2,
+   (ret - zcd.n1) * 2, 3);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret * 2;
+   case (TEST_RING_ELEM_BURST_ZC_COMPRESS_PTR_32):
+   ret = rte_ring_enqueue_zc_burst_elem_start(
+   r, sizeof(uint32_t), n, &zcd, NULL);
+   if (unlikely(ret == 0))
+   return 0;
+   rte_ptr_compress_32(0, obj, zcd.ptr1, zcd.n1, 3);
+   if (unlikely(zcd.ptr2 != NULL))
+   rte_ptr_compress_32(0, obj + zcd.n1,
+   zcd.ptr2, ret - zcd.n1, 3);
+   rte_ring_enqueue_zc_finish(r, ret);
+   return ret;
default:
printf("Invalid API type\n");
return 0;
@@ -162,6 +211,9 @@ static inline unsigned int
 test_ring_dequeue(struct rte_ring *r, void **obj, int esize, unsigned int n,
unsigned int api_type)
 {
+   unsigned int ret;
+   struct rte_ring_zc_data zcd = {0};
+
/* Legacy queue APIs? */
if (esize == -1)

[PATCH v5 4/4] test: add unit test for ptr compression

2023-11-01 Thread Paul Szczepanek
Test compresses and decompresses pointers with various combinations
of memory regions and alignments and verifies the pointers are
recovered correctly.

Signed-off-by: Paul Szczepanek 
---
 app/test/meson.build |   1 +
 app/test/test_eal_ptr_compress.c | 108 +++
 2 files changed, 109 insertions(+)
 create mode 100644 app/test/test_eal_ptr_compress.c

diff --git a/app/test/meson.build b/app/test/meson.build
index 4183d66b0e..3e172b154d 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -66,6 +66,7 @@ source_file_deps = {
 'test_dmadev_api.c': ['dmadev'],
 'test_eal_flags.c': [],
 'test_eal_fs.c': [],
+'test_eal_ptr_compress.c': [],
 'test_efd.c': ['efd', 'net'],
 'test_efd_perf.c': ['efd', 'hash'],
 'test_errno.c': [],
diff --git a/app/test/test_eal_ptr_compress.c b/app/test/test_eal_ptr_compress.c
new file mode 100644
index 00..c1c9a98be7
--- /dev/null
+++ b/app/test/test_eal_ptr_compress.c
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include "test.h"
+#include 
+#include 
+
+#include 
+
+#define MAX_ALIGN_EXPONENT 3
+#define PTRS_SIZE 16
+#define NUM_BASES 2
+#define NUM_REGIONS 4
+#define MAX_32BIT_REGION ((uint64_t)UINT32_MAX + 1)
+#define MAX_16BIT_REGION (UINT16_MAX + 1)
+
+static int
+test_eal_ptr_compress_params(
+   void *base,
+   uint64_t mem_sz,
+   unsigned int align_exp,
+   unsigned int num_ptrs,
+   bool use_32_bit)
+{
+   unsigned int i;
+   unsigned int align = 1 << align_exp;
+   void *ptrs[PTRS_SIZE] = {0};
+   void *ptrs_out[PTRS_SIZE] = {0};
+   uint32_t offsets32[PTRS_SIZE] = {0};
+   uint16_t offsets16[PTRS_SIZE] = {0};
+
+   for (i = 0; i < num_ptrs; i++) {
+   /* make pointers point at memory in steps of align */
+   /* alternate steps from the start and end of memory region */
+   if ((i & 1) == 1)
+   ptrs[i] = (char *)base + mem_sz - i * align;
+   else
+   ptrs[i] = (char *)base + i * align;
+   }
+
+   if (use_32_bit) {
+   rte_ptr_compress_32(base, ptrs, offsets32, num_ptrs, align_exp);
+   rte_ptr_decompress_32(base, offsets32, ptrs_out, num_ptrs,
+   align_exp);
+   } else {
+   rte_ptr_compress_16(base, ptrs, offsets16, num_ptrs, align_exp);
+   rte_ptr_decompress_16(base, offsets16, ptrs_out, num_ptrs,
+   align_exp);
+   }
+
+   TEST_ASSERT_BUFFERS_ARE_EQUAL(ptrs, ptrs_out, sizeof(void *) * num_ptrs,
+   "Decompressed pointers corrupted\nbase pointer: %p, "
+   "memory region size: %" PRIu64 ", alignment exponent: %u, "
+   "num of pointers: %u, using %s offsets",
+   base, mem_sz, align_exp, num_ptrs,
+   use_32_bit ? "32-bit" : "16-bit");
+
+   return 0;
+}
+
+static int
+test_eal_ptr_compress(void)
+{
+   unsigned int j, k, n;
+   int ret = 0;
+   void * const bases[NUM_BASES] = { (void *)0, (void *)UINT16_MAX };
+   /* maximum size for pointers aligned by consecutive powers of 2 */
+   const uint64_t region_sizes_16[NUM_REGIONS] = {
+   MAX_16BIT_REGION,
+   MAX_16BIT_REGION * 2,
+   MAX_16BIT_REGION * 4,
+   MAX_16BIT_REGION * 8,
+   };
+   const uint64_t region_sizes_32[NUM_REGIONS] = {
+   MAX_32BIT_REGION,
+   MAX_32BIT_REGION * 2,
+   MAX_32BIT_REGION * 4,
+   MAX_32BIT_REGION * 8,
+   };
+
+   for (j = 0; j < NUM_REGIONS; j++) {
+   for (k = 0; k < NUM_BASES; k++) {
+   for (n = 1; n < PTRS_SIZE; n++) {
+   ret |= test_eal_ptr_compress_params(
+   bases[k],
+   region_sizes_16[j],
+   j /* exponent of alignment */,
+   n,
+   false
+   );
+   ret |= test_eal_ptr_compress_params(
+   bases[k],
+   region_sizes_32[j],
+   j /* exponent of alignment */,
+   n,
+   true
+   );
+   if (ret != 0)
+   return ret;
+   }
+   }
+   }
+
+   return ret;
+}
+
+REGISTER_FAST_TEST(eal_ptr_compress_autotest, true, true, 
test_eal_ptr_compress);
--
2.25.1



Re: [PATCH] eal: stop iteration after lcore info is processed

2023-11-01 Thread Stephen Hemminger
On Wed,  1 Nov 2023 15:20:53 +0800
Ruifeng Wang  wrote:

> From: Ruifeng Wang 
> To: Kevin Laatz , Robin Jarry ,  
> Morten Brørup 
> Cc: dev@dpdk.org, honnappa.nagaraha...@arm.com, n...@arm.com,  Ruifeng Wang 
> , sta...@dpdk.org
> Subject: [PATCH] eal: stop iteration after lcore info is processed
> Date: Wed,  1 Nov 2023 15:20:53 +0800
> X-Mailer: git-send-email 2.25.1
> 
> Telemetry iterates on lcore ID to collect info of a specific lcore.
> Since only one lcore is processed at a time, the iteration can stop
> when a matching lcore is found.
> 
> Fixes: f2b852d909f9 ("eal: add lcore info in telemetry")
> Cc: rja...@redhat.com
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Ruifeng Wang 

Looks like a good optimization. Not sure it needs to go to stable.

Acked-by: Stephen Hemminger 


Re: [PATCH v2 00/11] Clean up NFP PMD

2023-11-01 Thread Ferruh Yigit
On 10/28/2023 7:53 AM, Chaoyong He wrote:
> This patch series clean up the NFP PMD, by:
> - Using the DPDK macro and API to replace the user defined ones.
> - Remove the unneeded macro and logic.
> - Remove the duplicated logic.
> 
> ---
> v2:
> * Fix the compile error.
> * Fix one check script warning.
> ---
> 
> Chaoyong He (11):
>   net/nfp: use the suitable helper macro
>   net/nfp: remove the unneeded call of underlying API
>   net/nfp: remove the unneeded check of process type
>   net/nfp: remove the unneeded data abstraction
>   net/nfp: remove the redundancy macro
>   net/nfp: remove redundancy logic of init control BAR
>   net/nfp: use the DPDK defined function
>   net/nfp: replace hard coded value
>   net/nfp: unify the PMD name with macro
>   net/nfp: extract a helper function
>   net/nfp: remove the redundancy logic of representor port
> 


'struct nfp_net_adapter' removed in 4/11.

Series applied to dpdk-next-net/main, thanks.



RE: [PATCH v3] event/dlb2: fix disable PASID for kernel 6.2

2023-11-01 Thread Sevincer, Abdullah

>++ PCIe maintainers.

>+I will leave this up to @David Marchand  / @Thomas as this patch has common 
>code changes and needs to come via main tree.

>+Also in this case, The comment was given very early(Back in June 7) for the 
>same.
>+https://patches.dpdk.org/project/dpdk/patch/20230607210050.107944-1-abdullah.sevin...@intel.com/

Thanks Jerrin and Bruce for the comments.
I will wait for opinion of PCI maintainers.


Re: [PATCH] net/enic: avoid extra unlock when setting MTU in enic

2023-11-01 Thread Ferruh Yigit
On 11/1/2023 7:28 AM, Weiguo Li wrote:
> The 'set_mtu_done' goto statement is being executed in a context
> where the 'mtu_lock' has not been previously locked.
> 
> To avoid the extra unlocking operation, replace the goto statement
> with a return statement.
> 
> Fixes: c3e09182bcd6 ("net/enic: support scatter Rx in MTU update")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: Weiguo Li 
>

<...>

>
> diff --git a/.mailmap b/.mailmap
> index 3f5bab26a8..b4f0ae26b8 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -1500,7 +1500,7 @@ Waterman Cao 
>  Weichun Chen 
>  Wei Dai 
>  Weifeng Li 
> -Weiguo Li 
> +Weiguo Li  
>

As this patch signed-off with new email address, I assume intention is
to make it default address, so I will update accordingly while merging.


Reviewed-by: Ferruh Yigit 

Applied to dpdk-next-net/main, thanks.


Re: [PATCH 0/6] net/hns3: add some bugfix for hns3

2023-11-01 Thread Ferruh Yigit
On 10/31/2023 12:23 PM, Jie Hai wrote:
> This patchset contains some bugfix for hns3 pmd.
> 
> Huisong Li (2):
>   net/hns3: fix setting DCB capability
>   net/hns3: fix LRO offload to report
> 
> Jie Hai (4):
>   net/hns3: fix return value
>   net/hns3: fix some error log
>   net/hns3: do not export API for setting and getting algo and key
>   net/hns3: fix uninitialized value
> 

Series applied to dpdk-next-net/main, thanks.


[PATCH] eal: add missing extension to statement expression

2023-11-01 Thread Tyler Retzlaff
add missing __extension__ keyword to RTE_ALIGN_MUL_NEAR statement
expression to be consistent with other macros using statement
expressions

Signed-off-by: Tyler Retzlaff 
---
 lib/eal/include/rte_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
index 484f81e..c1ba32d 100644
--- a/lib/eal/include/rte_common.h
+++ b/lib/eal/include/rte_common.h
@@ -467,7 +467,7 @@ static void __attribute__((destructor(RTE_PRIO(prio)), 
used)) func(void)
  * whichever difference is the lowest.
  */
 #define RTE_ALIGN_MUL_NEAR(v, mul) \
-   ({  \
+   __extension__ ({\
typeof(v) ceil = RTE_ALIGN_MUL_CEIL(v, mul);\
typeof(v) floor = RTE_ALIGN_MUL_FLOOR(v, mul);  \
(ceil - (v)) > ((v) - floor) ? floor : ceil;\
-- 
1.8.3.1



[PATCH] eal: provide trace point register macro for MSVC

2023-11-01 Thread Tyler Retzlaff
Provide an alternate RTE_TRACE_POINT_REGISTER macro when building with
MSVC that allocates segments for the trace point using MSVC specific
features

Signed-off-by: Tyler Retzlaff 
---
 lib/eal/include/rte_trace_point_register.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/lib/eal/include/rte_trace_point_register.h 
b/lib/eal/include/rte_trace_point_register.h
index a9682d3..e6c2abe 100644
--- a/lib/eal/include/rte_trace_point_register.h
+++ b/lib/eal/include/rte_trace_point_register.h
@@ -18,6 +18,19 @@
 
 RTE_DECLARE_PER_LCORE(volatile int, trace_point_sz);
 
+#ifdef RTE_TOOLCHAIN_MSVC
+#define RTE_TRACE_POINT_REGISTER(trace, name) \
+rte_trace_point_t \
+__pragma(data_seg("__rte_trace_point")) \
+__declspec(allocate("__rte_trace_point")) \
+__##trace; \
+static const char __##trace##_name[] = RTE_STR(name); \
+RTE_INIT(trace##_init) \
+{ \
+   __rte_trace_point_register(&__##trace, __##trace##_name, \
+   (void (*)(void)) trace); \
+}
+#else
 #define RTE_TRACE_POINT_REGISTER(trace, name) \
 rte_trace_point_t __attribute__((section("__rte_trace_point"))) __##trace; \
 static const char __##trace##_name[] = RTE_STR(name); \
@@ -26,6 +39,7 @@
__rte_trace_point_register(&__##trace, __##trace##_name, \
(void (*)(void)) trace); \
 }
+#endif
 
 #define __rte_trace_point_emit_header_generic(t) \
RTE_PER_LCORE(trace_point_sz) = __RTE_TRACE_EVENT_HEADER_SZ
-- 
1.8.3.1



Re: [PATCH v3 0/2] ethdev: add the check for PTP capability

2023-11-01 Thread Ferruh Yigit
On 10/20/2023 4:58 AM, lihuisong (C) wrote:
> 
> 在 2023/9/21 19:17, Hemant Agrawal 写道:
>> HI Ferruh,
>>
>>> On 9/21/2023 11:02 AM, lihuisong (C) wrote:
 Hi Ferruh,

 Sorry for my delay reply because of taking a look at all PMDs
 implementation.


 在 2023/9/16 1:46, Ferruh Yigit 写道:
> On 8/17/2023 9:42 AM, Huisong Li wrote:
>>   From the first version of ptpclient, it seems that this example
>> assume that the PMDs support the PTP feature and enable PTP by
>> default. Please see commit ab129e9065a5 ("examples/ptpclient: add
>> minimal PTP client") which are introduced in 2015.
>>
>> And two years later, Rx HW timestamp offload was introduced to
>> enable or disable PTP feature in HW via rte_eth_rxmode. Please see
>> commit 42ffc45aa340 ("ethdev: add Rx HW timestamp capability").
>>
> Hi Huisong,
>
> As far as I know this offload is not for PTP.
> PTP and TIMESTAMP are different.
 If TIMESTAMP offload cannot stand for PTP, we may need to add one new
 offlaod for PTP.

>>> Can you please detail what is "PTP offload"?
>>>
> PTP is a protocol for time sync.
> Rx TIMESTAMP offload is to ask HW to add timestamp to mbuf.
 Yes.
 But a lot of PMDs actually depand on HW to report Rx timestamp
 releated information because of reading Rx timestamp of PTP SYNC
 packet in read_rx_timestamp API.

>>> HW support may be required for PTP but this doesn't mean timestamp
>>> offload is used.
>> And then about four years later, ptpclient enable Rx timestamp
>> offload because some PMDs require this offload to enable. Please see
>> commit 7a04a4f67dca ("examples/ptpclient: enable Rx timestamp
>>> offload").
> dpaa2 seems using TIMESTAMP offload and PTP together, hence they
> updated ptpclient sample to set TIMESTAMP offload.
>> [Hemant] In case of dpaa2, we need to enable HW timestamp for PTP. In
>> the current dpaa2 driver
>> If the code is compiled with, RTE_LIBRTE_IEEE1588, we are enabling the
>> HW timestamp
>> Otherwise, we are only enabling it when the TIMESTAMP offload is
>> selected.
>>
>> We added patch in ptpclient earlier to pass the timestamp offload,
>> however later we also updated the driver to do it by default.
>>
>>
> It is a little mess for PTP and RTE_LIBRTE_IEEE1588 to use.
> Actually, whether PTP code is compiled should not depended on this macro
> RTE_LIBRTE_IEEE1588.
>

There is already a patch by Thomas to remove RTE_LIBRTE_IEEE1588 [1],
agree that this functionality needs some attention.

Removing RTE_LIBRTE_IEEE1588 impact drivers, that is what holding us back.


[1]
https://patchwork.dpdk.org/project/dpdk/patch/20230203132810.14187-1-tho...@monjalon.net/

> If there is a capability, it will be perfect, no matter whether it is
> TIMESTAMP offload.
> What do you think, Ferruh?
>

Difficulty is to know when to enable HW timestamp, and for some drivers
this may change the descriptor format (to include timestamp), so driver
should set correct datapath functions for this case.

We know when a HW timer is required, it is required for PTP protocol and
required for TIMESTAMP offload.

What do you think to dynamically enable it for PTP when
'rte_eth_timesync_enable()' API called, and for TIMESTAMP offload when
the offload is enabled.
If this works, now new configuration item or offload is required, what
do you think?


 There are many PMDs doing like this, such as ice, igc, cnxk, dpaa2,
 hns3 and so on.

>>> Can you please point the ice & igc code, cc'ing their maintainers, we
>>> can look
>>> together?
>>>
>>>
> We need to clarify dpaa2 usage.
>
>> By all the records, this is more like a process of perfecting PTP
>> feature.
>> Not all network adaptors support PTP feature. So adding the check
>> for PTP capability in ethdev layer is necessary.
>>
> Nope, as PTP (IEEE1588/802.1AS) implemented as dev_ops, and ops
> already checked, so no additional check is needed.
 But only having dev_ops about PTP doesn't satisfy the use of this
 feature.
 For example,
 there are serveal network ports belonged to a driver on one OS, and
 only one port support PTP function.
 So driver needs one *PTP* offload.
> We just need to clarify TIMESTAMP offload and PTP usage and find out
> what is causing confusion.
 Yes it is a little bit confusion.
 There are two kinds of implementation:
 A: ixgbe and txgbe (it seems that their HW is similar) don't need
 TIMESTAMP offload,and only use dev_ops to finish PTP feature.
 B:  saving "Rx timestamp related information" from Rx description when
 receive PTP SYNC packet and
  report it in read_rx_timestamp API.
 For case B, most of driver use TIMESTAMP offload to decide if driver
 save "Rx timestamp related information.
 What do you think about this, Ferruh?
> I would be great if you can help on clarification, and update
> d

Re: [PATCH v3 0/2] ethdev: add the check for PTP capability

2023-11-01 Thread Ferruh Yigit
On 9/21/2023 12:17 PM, Hemant Agrawal wrote:
> HI Ferruh,
> 
>> On 9/21/2023 11:02 AM, lihuisong (C) wrote:
>>> Hi Ferruh,
>>>
>>> Sorry for my delay reply because of taking a look at all PMDs
>>> implementation.
>>>
>>>
>>> 在 2023/9/16 1:46, Ferruh Yigit 写道:
 On 8/17/2023 9:42 AM, Huisong Li wrote:
>  From the first version of ptpclient, it seems that this example
> assume that the PMDs support the PTP feature and enable PTP by
> default. Please see commit ab129e9065a5 ("examples/ptpclient: add
> minimal PTP client") which are introduced in 2015.
>
> And two years later, Rx HW timestamp offload was introduced to
> enable or disable PTP feature in HW via rte_eth_rxmode. Please see
> commit 42ffc45aa340 ("ethdev: add Rx HW timestamp capability").
>
 Hi Huisong,

 As far as I know this offload is not for PTP.
 PTP and TIMESTAMP are different.
>>> If TIMESTAMP offload cannot stand for PTP, we may need to add one new
>>> offlaod for PTP.
>>>
>>
>> Can you please detail what is "PTP offload"?
>>

 PTP is a protocol for time sync.
 Rx TIMESTAMP offload is to ask HW to add timestamp to mbuf.
>>> Yes.
>>> But a lot of PMDs actually depand on HW to report Rx timestamp
>>> releated information because of reading Rx timestamp of PTP SYNC
>>> packet in read_rx_timestamp API.
>>>
>>
>> HW support may be required for PTP but this doesn't mean timestamp
>> offload is used.
> 
>>

> And then about four years later, ptpclient enable Rx timestamp
> offload because some PMDs require this offload to enable. Please see
> commit 7a04a4f67dca ("examples/ptpclient: enable Rx timestamp
>> offload").
>
 dpaa2 seems using TIMESTAMP offload and PTP together, hence they
 updated ptpclient sample to set TIMESTAMP offload.
> 
> [Hemant] In case of dpaa2, we need to enable HW timestamp for PTP. In the 
> current dpaa2 driver
> If the code is compiled with, RTE_LIBRTE_IEEE1588, we are enabling the HW 
> timestamp
> Otherwise, we are only enabling it when the TIMESTAMP offload is selected.  
> 

I think this is reasonable, HW timestamp enabled only when required.


> We added patch in ptpclient earlier to pass the timestamp offload, however 
> later we also updated the driver to do it by default. 
> 

This part I am not sure,
so application request TIMESTAMP offload enable HW timestamp to use it
for PTP.

There are already 'rte_eth_timesync_enable()' and
'rte_eth_timesync_disable()' functions, and ptpclient sample already
uses them, why now utilize these APIs to enable HW timestamp, or other
related configuration?


> 
>>> There are many PMDs doing like this, such as ice, igc, cnxk, dpaa2,
>>> hns3 and so on.
>>>
>>
>> Can you please point the ice & igc code, cc'ing their maintainers, we can 
>> look
>> together?
>>
>>

 We need to clarify dpaa2 usage.

> By all the records, this is more like a process of perfecting PTP
> feature.
> Not all network adaptors support PTP feature. So adding the check
> for PTP capability in ethdev layer is necessary.
>
 Nope, as PTP (IEEE1588/802.1AS) implemented as dev_ops, and ops
 already checked, so no additional check is needed.
>>> But only having dev_ops about PTP doesn't satisfy the use of this feature.
>>> For example,
>>> there are serveal network ports belonged to a driver on one OS, and
>>> only one port support PTP function.
>>> So driver needs one *PTP* offload.

 We just need to clarify TIMESTAMP offload and PTP usage and find out
 what is causing confusion.
>>> Yes it is a little bit confusion.
>>> There are two kinds of implementation:
>>> A: ixgbe and txgbe (it seems that their HW is similar) don't need
>>> TIMESTAMP offload,and only use dev_ops to finish PTP feature.
>>> B:  saving "Rx timestamp related information" from Rx description when
>>> receive PTP SYNC packet and
>>>     report it in read_rx_timestamp API.
>>> For case B, most of driver use TIMESTAMP offload to decide if driver
>>> save "Rx timestamp related information.
>>> What do you think about this, Ferruh?
 I would be great if you can help on clarification, and update
 documentation or API comments, or what ever required, for this.
>>> ok

> ---
> v3:
>   - patch [2/3] for hns3 has been applied and so remove it.
>   - ops pointer check is closer to usage.
>
> Huisong Li (2):
>    examples/ptpclient: add the check for PTP capability
>    ethdev: add the check for the valitity of timestamp offload
>
>   examples/ptpclient/ptpclient.c |  5 +++
>   lib/ethdev/rte_ethdev.c    | 57
> +-
>   2 files changed, 61 insertions(+), 1 deletion(-)
>
 .
> 



Re: [PATCH v3 0/2] ethdev: add the check for PTP capability

2023-11-01 Thread Ferruh Yigit
timesync_read_rx_timestamp
On 9/21/2023 12:59 PM, lihuisong (C) wrote:
> add ice & igc maintainers
> 
> 在 2023/9/21 19:06, Ferruh Yigit 写道:
>> On 9/21/2023 11:02 AM, lihuisong (C) wrote:
>>> Hi Ferruh,
>>>
>>> Sorry for my delay reply because of taking a look at all PMDs
>>> implementation.
>>>
>>>
>>> 在 2023/9/16 1:46, Ferruh Yigit 写道:
 On 8/17/2023 9:42 AM, Huisong Li wrote:
>   From the first version of ptpclient, it seems that this example
> assume that
> the PMDs support the PTP feature and enable PTP by default. Please see
> commit ab129e9065a5 ("examples/ptpclient: add minimal PTP client")
> which are introduced in 2015.
>
> And two years later, Rx HW timestamp offload was introduced to
> enable or
> disable PTP feature in HW via rte_eth_rxmode. Please see
> commit 42ffc45aa340 ("ethdev: add Rx HW timestamp capability").
>
 Hi Huisong,

 As far as I know this offload is not for PTP.
 PTP and TIMESTAMP are different.
>>> If TIMESTAMP offload cannot stand for PTP, we may need to add one new
>>> offlaod for PTP.
>>>
>> Can you please detail what is "PTP offload"?
>>
> It indicates whether the device supports PTP or enable  PTP feature.
>

We have 'rte_eth_timesync_enable()' and 'rte_eth_timesync_disable()'
APIs to control PTP support.

But when mention from "offload", it is something device itself does.

PTP is a protocol (IEEE 1588), and used to synchronize clocks.
What I get is protocol can be parsed by networking stack and it can be
used by application to synchronize clock.

When you are refer to "PTP offload", does it mean device (NIC)
understands the protocol and parse it to synchronize device clock with
other devices?


We have 'rte_eth_timesync_*()' APIs, my understanding is application
parses the PTP protocol, and it may use this information to configure
NIC to synchronize its clock, but it may also use PTP provided
information to sync any other clock. Is this understanding correct?


> If TIMESTAMP offload is not for PTP, I don't know what the point of this
> offload independent existence is.
>

TIMESTAMP offload request device to add timestamp to mbuf in ingress,
and use mbuf timestamp to schedule packet for egress.

Technically this time-stamping can be done by driver, but if offload
set, HW timestamp is used for it.

Rx timestamp can be used for various reasons, like debugging and
performance/latency analyses, etc..


>>
 PTP is a protocol for time sync.
 Rx TIMESTAMP offload is to ask HW to add timestamp to mbuf.
>>> Yes.
>>> But a lot of PMDs actually depand on HW to report Rx timestamp releated
>>> information
>>> because of reading Rx timestamp of PTP SYNC packet in read_rx_timestamp
>>> API.
>>>
>> HW support may be required for PTP but this doesn't mean timestamp
>> offload is used.
> understand.
>>
> And then about four years later, ptpclient enable Rx timestamp offload
> because some PMDs require this offload to enable. Please see
> commit 7a04a4f67dca ("examples/ptpclient: enable Rx timestamp
> offload").
>
 dpaa2 seems using TIMESTAMP offload and PTP together, hence they
 updated
 ptpclient sample to set TIMESTAMP offload.
>>> There are many PMDs doing like this, such as ice, igc, cnxk, dpaa2, hns3
>>> and so on.
>>>
>> Can you please point the ice & igc code, cc'ing their maintainers, we
>> can look together?
> 
> *-->igc code:*
> 
> Having following codes in igc_recv_scattered_pkts():
> 
>     if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {
>         uint32_t *ts = rte_pktmbuf_mtod_offset(first_seg,
>                 uint32_t *, -IGC_TS_HDR_LEN);
>         rxq->rx_timestamp = (uint64_t)ts[3] * NSEC_PER_SEC +
>                 ts[2];
>         rxm->timesync = rxq->queue_id;
>     }
> Note:this rxm->timesync will be used in timesync_read_rx_timestamp()
> 

Above code requires TIMESTAMP offload to set timesync, but this
shouldn't be a requirement. Usage seems mixed.

> *-->ice code:*
> 
> #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
>     if (ice_timestamp_dynflag > 0 &&
>         (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) {
>         rxq->time_high =
>            rte_le_to_cpu_32(rxd.wb.flex_ts.ts_high);
>         if (unlikely(is_tsinit)) {
>             ts_ns = ice_tstamp_convert_32b_64b(hw, ad, 1,
> rxq->time_high);
>             rxq->hw_time_low = (uint32_t)ts_ns;
>             rxq->hw_time_high = (uint32_t)(ts_ns >> 32);
>             is_tsinit = false;
>         } else {
>             if (rxq->time_high < rxq->hw_time_low)
>                 rxq->hw_time_high += 1;
>             ts_ns = (uint64_t)rxq->hw_time_high << 32 | rxq->time_high;
>             rxq->hw_time_low = rxq->time_high;
>         }
>         rxq->hw_time_update = rte_get_timer_cycles() /
>                  (rte_get_timer_hz() / 1000);
>         *RTE_MBUF_DYNFIELD(rxm,
>                    (ice_timesta

[PATCH 1/5] table: use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
Use rte_clz64 instead of __builtin_clzl
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff 
---
 lib/table/rte_lru_arm64.h  | 2 +-
 lib/table/rte_swx_table_em.c   | 4 ++--
 lib/table/rte_table_hash_ext.c | 4 ++--
 lib/table/rte_table_hash_lru.c | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/table/rte_lru_arm64.h b/lib/table/rte_lru_arm64.h
index add889a..ddfd841 100644
--- a/lib/table/rte_lru_arm64.h
+++ b/lib/table/rte_lru_arm64.h
@@ -33,7 +33,7 @@
uint16x4_t min_vec = vmov_n_u16(vminv_u16(lru_vec));
uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(
vceq_u16(min_vec, lru_vec)), 0);
-   return __builtin_clzl(mask) >> 4;
+   return rte_clz64(mask) >> 4;
 }
 #define lru_pos(bucket) f_lru_pos(bucket->lru_list)
 
diff --git a/lib/table/rte_swx_table_em.c b/lib/table/rte_swx_table_em.c
index 84837c8..8d67c05 100644
--- a/lib/table/rte_swx_table_em.c
+++ b/lib/table/rte_swx_table_em.c
@@ -260,8 +260,8 @@ struct table {
if (!params->hash_func)
t->params.hash_func = rte_hash_crc;
 
-   t->key_size_shl = __builtin_ctzl(key_size);
-   t->data_size_shl = __builtin_ctzl(key_data_size);
+   t->key_size_shl = rte_ctz64(key_size);
+   t->data_size_shl = rte_ctz64(key_data_size);
t->n_buckets = n_buckets;
t->n_buckets_ext = n_buckets_ext;
t->total_size = total_size;
diff --git a/lib/table/rte_table_hash_ext.c b/lib/table/rte_table_hash_ext.c
index 51a20ac..1cf0fc2 100644
--- a/lib/table/rte_table_hash_ext.c
+++ b/lib/table/rte_table_hash_ext.c
@@ -243,8 +243,8 @@ struct rte_table_hash {
 
/* Internal */
t->bucket_mask = t->n_buckets - 1;
-   t->key_size_shl = __builtin_ctzl(p->key_size);
-   t->data_size_shl = __builtin_ctzl(entry_size);
+   t->key_size_shl = rte_ctz64(p->key_size);
+   t->data_size_shl = rte_ctz64(entry_size);
 
/* Tables */
key_mask_offset = 0;
diff --git a/lib/table/rte_table_hash_lru.c b/lib/table/rte_table_hash_lru.c
index a4e1a05..5f28710 100644
--- a/lib/table/rte_table_hash_lru.c
+++ b/lib/table/rte_table_hash_lru.c
@@ -220,8 +220,8 @@ struct rte_table_hash {
 
/* Internal */
t->bucket_mask = t->n_buckets - 1;
-   t->key_size_shl = __builtin_ctzl(p->key_size);
-   t->data_size_shl = __builtin_ctzl(entry_size);
+   t->key_size_shl = rte_ctz64(p->key_size);
+   t->data_size_shl = rte_ctz64(entry_size);
 
/* Tables */
key_mask_offset = 0;
-- 
1.8.3.1



[PATCH 2/5] distributor: use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff 
---
 lib/distributor/rte_distributor_single.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/distributor/rte_distributor_single.c 
b/lib/distributor/rte_distributor_single.c
index ad43c13..08144e5 100644
--- a/lib/distributor/rte_distributor_single.c
+++ b/lib/distributor/rte_distributor_single.c
@@ -252,7 +252,7 @@ struct rte_mbuf *
 
if (match) {
next_mb = NULL;
-   unsigned worker = __builtin_ctzl(match);
+   unsigned worker = rte_ctz64(match);
if (add_to_backlog(&d->backlog[worker],
next_value) < 0)
next_idx--;
-- 
1.8.3.1



[PATCH 0/5] use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
The first set of conversions missed the long 'l' versions of the
builtins that were being used. This series completes the conversion
of remaining libraries from __builtin_ctzl and __builtin_clzl.

Tyler Retzlaff (5):
  table: use abstracted bit count functions
  distributor: use abstracted bit count functions
  hash: use abstracted bit count functions
  member: use abstracted bit count functions
  rcu: use abstracted bit count functions

 lib/distributor/rte_distributor_single.c |  2 +-
 lib/hash/rte_cuckoo_hash.c   | 16 
 lib/member/rte_member_vbf.c  | 12 ++--
 lib/member/rte_member_x86.h  |  6 +++---
 lib/rcu/rte_rcu_qsbr.c   |  4 ++--
 lib/rcu/rte_rcu_qsbr.h   |  2 +-
 lib/table/rte_lru_arm64.h|  2 +-
 lib/table/rte_swx_table_em.c |  4 ++--
 lib/table/rte_table_hash_ext.c   |  4 ++--
 lib/table/rte_table_hash_lru.c   |  4 ++--
 10 files changed, 28 insertions(+), 28 deletions(-)

-- 
1.8.3.1



[PATCH 3/5] hash: use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff 
---
 lib/hash/rte_cuckoo_hash.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
index b2cf60d..d8d4cc1 100644
--- a/lib/hash/rte_cuckoo_hash.c
+++ b/lib/hash/rte_cuckoo_hash.c
@@ -1931,7 +1931,7 @@ struct rte_hash *
 
if (prim_hitmask[i]) {
uint32_t first_hit =
-   __builtin_ctzl(prim_hitmask[i])
+   rte_ctz64(prim_hitmask[i])
>> 1;
uint32_t key_idx =
primary_bkt[i]->key_idx[first_hit];
@@ -1945,7 +1945,7 @@ struct rte_hash *
 
if (sec_hitmask[i]) {
uint32_t first_hit =
-   __builtin_ctzl(sec_hitmask[i])
+   rte_ctz64(sec_hitmask[i])
>> 1;
uint32_t key_idx =
secondary_bkt[i]->key_idx[first_hit];
@@ -1962,7 +1962,7 @@ struct rte_hash *
positions[i] = -ENOENT;
while (prim_hitmask[i]) {
uint32_t hit_index =
-   __builtin_ctzl(prim_hitmask[i])
+   rte_ctz64(prim_hitmask[i])
>> 1;
uint32_t key_idx =
primary_bkt[i]->key_idx[hit_index];
@@ -1990,7 +1990,7 @@ struct rte_hash *
 
while (sec_hitmask[i]) {
uint32_t hit_index =
-   __builtin_ctzl(sec_hitmask[i])
+   rte_ctz64(sec_hitmask[i])
>> 1;
uint32_t key_idx =
secondary_bkt[i]->key_idx[hit_index];
@@ -2088,7 +2088,7 @@ struct rte_hash *
 
if (prim_hitmask[i]) {
uint32_t first_hit =
-   __builtin_ctzl(prim_hitmask[i])
+   rte_ctz64(prim_hitmask[i])
>> 1;
uint32_t key_idx =
primary_bkt[i]->key_idx[first_hit];
@@ -2102,7 +2102,7 @@ struct rte_hash *
 
if (sec_hitmask[i]) {
uint32_t first_hit =
-   __builtin_ctzl(sec_hitmask[i])
+   rte_ctz64(sec_hitmask[i])
>> 1;
uint32_t key_idx =
secondary_bkt[i]->key_idx[first_hit];
@@ -2118,7 +2118,7 @@ struct rte_hash *
for (i = 0; i < num_keys; i++) {
while (prim_hitmask[i]) {
uint32_t hit_index =
-   __builtin_ctzl(prim_hitmask[i])
+   rte_ctz64(prim_hitmask[i])
>> 1;
uint32_t key_idx =
rte_atomic_load_explicit(
@@ -2150,7 +2150,7 @@ struct rte_hash *
 
while (sec_hitmask[i]) {
uint32_t hit_index =
-   __builtin_ctzl(sec_hitmask[i])
+   rte_ctz64(sec_hitmask[i])
>> 1;
uint32_t key_idx =
rte_atomic_load_explicit(
-- 
1.8.3.1



[PATCH 5/5] rcu: use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff 
---
 lib/rcu/rte_rcu_qsbr.c | 4 ++--
 lib/rcu/rte_rcu_qsbr.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/rcu/rte_rcu_qsbr.c b/lib/rcu/rte_rcu_qsbr.c
index 4dc7714..a9f3d6c 100644
--- a/lib/rcu/rte_rcu_qsbr.c
+++ b/lib/rcu/rte_rcu_qsbr.c
@@ -231,7 +231,7 @@
rte_memory_order_acquire);
id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
while (bmap) {
-   t = __builtin_ctzl(bmap);
+   t = rte_ctz64(bmap);
fprintf(f, "%u ", id + t);
 
bmap &= ~(1UL << t);
@@ -252,7 +252,7 @@
rte_memory_order_acquire);
id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
while (bmap) {
-   t = __builtin_ctzl(bmap);
+   t = rte_ctz64(bmap);
fprintf(f, "thread ID = %u, count = %" PRIu64 ", lock 
count = %u\n",
id + t,
rte_atomic_load_explicit(
diff --git a/lib/rcu/rte_rcu_qsbr.h b/lib/rcu/rte_rcu_qsbr.h
index 9f4aed2..13461f8 100644
--- a/lib/rcu/rte_rcu_qsbr.h
+++ b/lib/rcu/rte_rcu_qsbr.h
@@ -530,7 +530,7 @@ struct rte_rcu_qsbr_dq_parameters {
id = i << __RTE_QSBR_THRID_INDEX_SHIFT;
 
while (bmap) {
-   j = __builtin_ctzl(bmap);
+   j = rte_ctz64(bmap);
__RTE_RCU_DP_LOG(DEBUG,
"%s: check: token = %" PRIu64 ", wait = %d, Bit 
Map = 0x%" PRIx64 ", Thread ID = %d",
__func__, t, wait, bmap, id + j);
-- 
1.8.3.1



[PATCH 4/5] member: use abstracted bit count functions

2023-11-01 Thread Tyler Retzlaff
Use rte_ctz64 instead of __builtin_ctzl

Fixes: 18898c4d06f9 ("eal: use abstracted bit count functions")

Signed-off-by: Tyler Retzlaff 
---
 lib/member/rte_member_vbf.c | 12 ++--
 lib/member/rte_member_x86.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/member/rte_member_vbf.c b/lib/member/rte_member_vbf.c
index 9df4620..6440e35 100644
--- a/lib/member/rte_member_vbf.c
+++ b/lib/member/rte_member_vbf.c
@@ -108,8 +108,8 @@
 * div_shift is used for division shift, to be divided by number of bits
 * represented by a uint32_t variable
 */
-   ss->mul_shift = __builtin_ctzl(ss->num_set);
-   ss->div_shift = __builtin_ctzl(32 >> ss->mul_shift);
+   ss->mul_shift = rte_ctz64(ss->num_set);
+   ss->div_shift = rte_ctz64(32 >> ss->mul_shift);
 
RTE_MEMBER_LOG(DEBUG, "vector bloom filter created, "
"each bloom filter expects %u keys, needs %u bits, %u hashes, "
@@ -174,7 +174,7 @@
}
 
if (mask) {
-   *set_id = __builtin_ctzl(mask) + 1;
+   *set_id = rte_ctz64(mask) + 1;
return 1;
}
 
@@ -207,7 +207,7 @@
}
for (i = 0; i < num_keys; i++) {
if (mask[i]) {
-   set_ids[i] = __builtin_ctzl(mask[i]) + 1;
+   set_ids[i] = rte_ctz64(mask[i]) + 1;
num_matches++;
} else
set_ids[i] = RTE_MEMBER_NO_MATCH;
@@ -233,7 +233,7 @@
mask &= test_bit(bit_loc, ss);
}
while (mask) {
-   uint32_t loc = __builtin_ctzl(mask);
+   uint32_t loc = rte_ctz64(mask);
set_id[num_matches] = loc + 1;
num_matches++;
if (num_matches >= match_per_key)
@@ -272,7 +272,7 @@
for (i = 0; i < num_keys; i++) {
match_cnt_t = 0;
while (mask[i]) {
-   uint32_t loc = __builtin_ctzl(mask[i]);
+   uint32_t loc = rte_ctz64(mask[i]);
set_ids[i * match_per_key + match_cnt_t] = loc + 1;
match_cnt_t++;
if (match_cnt_t >= match_per_key)
diff --git a/lib/member/rte_member_x86.h b/lib/member/rte_member_x86.h
index 74c8e38..ee830f5 100644
--- a/lib/member/rte_member_x86.h
+++ b/lib/member/rte_member_x86.h
@@ -22,7 +22,7 @@
_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
_mm256_set1_epi16(tmp_sig)));
if (hitmask) {
-   uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+   uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
buckets[bucket_id].sets[hit_idx] = set_id;
return 1;
}
@@ -38,7 +38,7 @@
_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
_mm256_set1_epi16(tmp_sig)));
while (hitmask) {
-   uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+   uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
*set_id = buckets[bucket_id].sets[hit_idx];
return 1;
@@ -59,7 +59,7 @@
_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
_mm256_set1_epi16(tmp_sig)));
while (hitmask) {
-   uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+   uint32_t hit_idx = rte_ctz64(hitmask) >> 1;
if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
set_id[*counter] = buckets[bucket_id].sets[hit_idx];
(*counter)++;
-- 
1.8.3.1



Re: [PATCH v3 1/3] net/tap: fix L4 checksum offloading

2023-11-01 Thread Ferruh Yigit
On 8/24/2023 8:18 AM, David Marchand wrote:
> The L4 checksum offloading API does not require l4_len to be set.
> Make the driver discover the L4 headers size by itself.
> 
> Fixes: 6546e76056e3 ("net/tap: calculate checksums of multi segs packets")
> Cc: sta...@dpdk.org
> 
> Signed-off-by: David Marchand 
> Tested-by: Ales Musil 
>

For series,
Acked-by: Ferruh Yigit 

Series applied to dpdk-next-net/main, thanks.


RE: [PATCH v2 04/11] net/nfp: remove the unneeded data abstraction

2023-11-01 Thread Chaoyong He
> On 10/28/2023 7:53 AM, Chaoyong He wrote:
> > The data structure 'struct nfp_net_adapter' has only one data field
> > and we won't extend it in the future, which makes this abstraction
> > unneeded, so remove this data structure and the related macro
> > 'NFP_NET_DEV_PRIVATE_TO_HW'.
> >
> 
> Mentioned abstract struct, 'struct nfp_net_adapter', is not removed in this
> patch, although mentioned macro removed.

Oh, It should be missed when I do rebase, sorry about it.

> 
> Since there is not user of the struct after this patch, I guess intention was 
> to
> remove the struct, so if there is no other issue I can remove the struct while
> merging.

It's nice, thank you very much!

> 
> 
> > Signed-off-by: Chaoyong He 
> > Reviewed-by: Peng Zhang 



Re: [PATCH] net/enic: avoid extra unlock when setting MTU in enic

2023-11-01 Thread John Daley (johndale)
Reviewed-by: John Daley 

Thanks,
John

From: Weiguo Li 
Date: Wednesday, November 1, 2023 at 12:28 AM
To: John Daley (johndale) 
Cc: dev@dpdk.org , sta...@dpdk.org , Weiguo Li 

Subject: [PATCH] net/enic: avoid extra unlock when setting MTU in enic
The 'set_mtu_done' goto statement is being executed in a context
where the 'mtu_lock' has not been previously locked.

To avoid the extra unlocking operation, replace the goto statement
with a return statement.

Fixes: c3e09182bcd6 ("net/enic: support scatter Rx in MTU update")
Cc: sta...@dpdk.org

Signed-off-by: Weiguo Li 
---
 .mailmap | 2 +-
 drivers/net/enic/enic_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3f5bab26a8..b4f0ae26b8 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1500,7 +1500,7 @@ Waterman Cao 
 Weichun Chen 
 Wei Dai 
 Weifeng Li 
-Weiguo Li 
+Weiguo Li  
 Wei Huang 
 Wei Hu 
 Wei Hu (Xavier) 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 19a99a82c5..a6aaa760ca 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1639,7 +1639,7 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu)
  * packet length.
  */
 if (!eth_dev->data->dev_started)
-   goto set_mtu_done;
+   return rc;

 /*
  * The device has started, re-do RQs on the fly. In the process, we
--
2.34.1


[PATCH v2] net/ice: fix DCF port statistics not cleared

2023-11-01 Thread Zhichao Zeng
Call 'ice_dcf_stats_reset' during the initialization of the DCF port in
order to clear any statistics that may exist from the last use of the DCF
and to avoid statistics errors.

Fixes: 7564d5509611 ("net/ice: add DCF hardware initialization")
Cc: sta...@dpdk.org

Signed-off-by: Zhichao Zeng 

---
v2: modify git log
---
 drivers/net/ice/ice_dcf_ethdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ice/ice_dcf_ethdev.c b/drivers/net/ice/ice_dcf_ethdev.c
index 065ec728c2..29699c2c32 100644
--- a/drivers/net/ice/ice_dcf_ethdev.c
+++ b/drivers/net/ice/ice_dcf_ethdev.c
@@ -1937,6 +1937,8 @@ ice_dcf_dev_init(struct rte_eth_dev *eth_dev)
return -1;
}
 
+   ice_dcf_stats_reset(eth_dev);
+
dcf_config_promisc(adapter, false, false);
return 0;
 }
-- 
2.34.1



[PATCH] net/ice: fix Tx Prepareation

2023-11-01 Thread Qi Zhang
1. Check nb_segs > 8 for NO TSO case
2. Check nb_segs > Tx ring size for TSO case
3. report nb_mtu_seg_max and nb_seg_max in dev_info.

Fixes: 17c7d0f9d6a4 ("net/ice: support basic Rx/Tx")
Cc: sta...@dpdk.org

Signed-off-by: Qi Zhang 
---
 drivers/net/ice/ice_ethdev.c |  2 ++
 drivers/net/ice/ice_rxtx.c   | 16 +++-
 drivers/net/ice/ice_rxtx.h   |  2 ++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 6ef06b9926..3ccba4db80 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3918,6 +3918,8 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
.nb_max = ICE_MAX_RING_DESC,
.nb_min = ICE_MIN_RING_DESC,
.nb_align = ICE_ALIGN_RING_DESC,
+   .nb_mtu_seg_max = ICE_TX_MTU_SEG_MAX,
+   .nb_seg_max = ICE_MAX_RING_DESC,
};
 
dev_info->speed_capa = RTE_ETH_LINK_SPEED_10M |
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index ee9cb7b955..868ee59933 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -3690,9 +3690,23 @@ ice_prep_pkts(__rte_unused void *tx_queue, struct 
rte_mbuf **tx_pkts,
m = tx_pkts[i];
ol_flags = m->ol_flags;
 
-   if (ol_flags & RTE_MBUF_F_TX_TCP_SEG &&
+   if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG) &&
+   /**
+* No TSO case: nb->segs, pkt_len to not exceed
+* the limites.
+*/
+   (m->nb_segs > ICE_TX_MTU_SEG_MAX ||
+m->pkt_len > ICE_FRAME_SIZE_MAX)) {
+   rte_errno = EINVAL;
+   return i;
+   } else if (ol_flags & RTE_MBUF_F_TX_TCP_SEG &&
+   /** TSO case: tso_segsz, nb_segs, pkt_len not exceed
+* the limits.
+*/
(m->tso_segsz < ICE_MIN_TSO_MSS ||
 m->tso_segsz > ICE_MAX_TSO_MSS ||
+m->nb_segs >
+   ((struct ice_tx_queue *)tx_queue)->nb_tx_desc ||
 m->pkt_len > ICE_MAX_TSO_FRAME_SIZE)) {
/**
 * MSS outside the range are considered malicious
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 268289716e..bd2c4abec9 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -56,6 +56,8 @@ extern int ice_timestamp_dynfield_offset;
 
 #define ICE_HEADER_SPLIT_ENA   BIT(0)
 
+#define ICE_TX_MTU_SEG_MAX 8
+
 typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
 typedef void (*ice_tx_release_mbufs_t)(struct ice_tx_queue *txq);
 typedef void (*ice_rxd_to_pkt_fields_t)(struct ice_rx_queue *rxq,
-- 
2.31.1



Re: [PATCH v3 0/3] introduce maximum Rx buffer size

2023-11-01 Thread lihuisong (C)



在 2023/11/2 0:08, Stephen Hemminger 写道:

On Wed, 1 Nov 2023 10:36:07 +0800
"lihuisong (C)"  wrote:


Do we need to report this size? It's a common feature for all PMDs.
It would make sense then to have max_rx_bufsize set to 16K by default
in ethdev, and PMD could then raise/lower based on hardware.

It is not appropriate to set to 16K by default in ethdev layer.
Because I don't see any check for the upper bound in some driver, like
axgbe, enetc and so on.
I'm not sure if they have no upper bound.
And some driver's maximum buffer size is "16384(16K) - 128"
So it's better to set to UINT32_MAX by default.
what do you think?

The goal is always giving application a working upper bound, and enforcing
that as much as possible in ethdev layer. It doesnt matter which pattern
does that.  Fortunately, telling application an incorrect answer is not fatal.
If over estimated, application pool would be wasting space.
If under estimated, application will get more fragmented packets.

I know what you mean.
If we set UINT32_MAX, it just means that driver don't report this upper 
bound.
This is also a very common way of handling. And it has no effect on the 
drivers that doesn't report this value.
On the contrary, if we set a default value (like 16K) in ethdev, user 
may be misunderstood and confused by that, right?
After all, this isn't the real upper bound of all drivers. And this 
fixed default value may affect the behavior of some driver that I didn't 
find their upper bound.

So I'd like to keep it as UINT32_MAX.


.


Re: [PATCH] test/dma: fix for buffer auto free

2023-11-01 Thread fengchengwen
Hi Amit,

  I prefer not use static variable to control it because it introduce many 
coupling.

  Suggest add one function which prepare the test_m2d_auto_free, like 
prepare_m2d_auto_free

if ((info.dev_capa & RTE_DMA_CAPA_M2D_AUTO_FREE) &&
dma_add_test[TEST_M2D_AUTO_FREE].enabled == true) {
if (prepare_m2d_auto_free(dev_id) != 0)
goto err;
if (runtest("m2d_auto_free", test_m2d_auto_free, 128, dev_id, 
vchan,
CHECK_ERRS) < 0)
goto err;
}   

In the new function, could do reinit vchan just like the beginging 
test_m2d_auto_free.
static int prepare_m2d_auto_free(int dev_id) {
const struct rte_dma_vchan_conf qconf = {
.direction = RTE_DMA_DIR_MEM_TO_DEV,
.nb_desc = TEST_RINGSIZE,
.auto_free.m2d.pool = pool,
.dst_port.port_type = RTE_DMA_PORT_PCIE,
.dst_port.pcie.coreid = 0,
};
/* Stop the device to reconfigure vchan because should use Mem-to-Dev 
mode. */
if (rte_dma_stop(dev_id) < 0)
ERR_RETURN("Error stopping device %u\n", dev_id);
if (rte_dma_vchan_setup(dev_id, vchan, &qconf) < 0)
ERR_RETURN("Error with queue configuration\n");
if (rte_dma_start(dev_id) != 0)
ERR_RETURN("Error with rte_dma_start()\n");
return 0;
}



Also I found a bug in test_m2d_auto_free function, if above path taken:
if (rte_pktmbuf_alloc_bulk(pool, src, NR_MBUF) != 0) {
printf("alloc src mbufs failed.\n");
ret = -1;
goto done;
}

done:
rte_pktmbuf_free_bulk(dst, NR_MBUF);
/* If the test passes source buffer will be freed in hardware. */
if (ret < 0)
rte_pktmbuf_free_bulk(&src[nb_done], (NR_MBUF - nb_done));
- then it will free invalid mbuf to pool because src was 
not success init


On 2023/11/1 18:18, Amit Prakash Shukla wrote:
> Buffer auto free test failed for more than 1 dma device as the device
> initialization for the test was been done only for the first dma device.
> This changeset fixes the same.
> 
> Fixes: 877cb3e37426 ("dmadev: add buffer auto free offload")
> 
> Signed-off-by: Amit Prakash Shukla 
> ---
>  app/test/test_dmadev.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/app/test/test_dmadev.c b/app/test/test_dmadev.c
> index 216f84b6bb..3d4cb37ee6 100644
> --- a/app/test/test_dmadev.c
> +++ b/app/test/test_dmadev.c
> @@ -49,6 +49,8 @@ struct dma_add_test dma_add_test[] = {
>   [TEST_M2D_AUTO_FREE] = {.name = "m2d_auto_free", .enabled = false},
>  };
>  
> +static bool dev_init;
> +
>  static void
>  __rte_format_printf(3, 4)
>  print_err(const char *func, int lineno, const char *format, ...)
> @@ -837,7 +839,6 @@ test_m2d_auto_free(int16_t dev_id, uint16_t vchan)
>   };
>   uint32_t buf_cnt1, buf_cnt2;
>   struct rte_mempool_ops *ops;
> - static bool dev_init;
>   uint16_t nb_done = 0;
>   bool dma_err = false;
>   int retry = 100;
> @@ -1011,6 +1012,7 @@ test_dmadev_instance(int16_t dev_id)
>  
>   if ((info.dev_capa & RTE_DMA_CAPA_M2D_AUTO_FREE) &&
>   dma_add_test[TEST_M2D_AUTO_FREE].enabled == true) {
> + dev_init = false;
>   if (runtest("m2d_auto_free", test_m2d_auto_free, 128, dev_id, 
> vchan,
>   CHECK_ERRS) < 0)
>   goto err;
> 


Re: [PATCH 3/3] GSG Section 2: Install and Build DPDK - Updated based on feedback

2023-11-01 Thread Tyler Retzlaff
On Tue, Oct 31, 2023 at 08:49:28PM -0400, David Young wrote:
> Merged windows_install_build.rst into building_from_sources.rst

[...]

> -Download the DPDK source code from the official repository 
> -``https://fast.dpdk.org/rel/``.
> +Windows System Requirements
> +^^^
>  
> -Use ``wget`` to grab the DPDK version::
> +Building the DPDK and its applications on Windows requires one of the 
> following
> +environments:
>  
> -wget https://fast.dpdk.org/rel/dpdk-.tar.xz
> +- The Clang-LLVM C compiler and Microsoft MSVC linker.
> +- The MinGW-w64 toolchain (either native or cross).
>  
> -Extract the downloaded archive:
> +The Meson Build system is used to prepare the sources for compilation with 
> the Ninja backend.
>  
> -.. code-block:: bash
> +Option 1: Clang-LLVM C Compiler and Microsoft MSVC Linker
> +"
> +
> +1. Install the Compiler: Download and install the Clang compiler from the 
> +   `LLVM website `_.
> +
> +2. Install the Linker: Download and install the Build Tools for Visual 
> Studio from the
> +   `Microsoft website `_.
> +   When installing build tools, select the “Visual C++ build tools” option 
> and make sure
> +   the Windows SDK is selected.
>  
> -   tar -xvf dpdk-.tar.gz
> +Option 2: MinGW-w64 Toolchain
> +""
>  
> -Navigate to the DPDK directory:
> +1. On Linux (for cross-compilation): Install MinGW-w64 via a package 
> manager. 
> +   Version 4.0.4 for Ubuntu 16.04 cannot be used due to a MinGW-w64 bug.
> +
> +2. On Windows: Obtain the latest version installer from the
> +   `MinGW-w64 repository `_. 
> +   Any thread model (POSIX or Win32) can be chosen, DPDK does not rely on 
> it. 
> +   Install to a folder without spaces in its name, like ``C:\MinGW``. 
> +   This path is assumed for the rest of this guide.
> +
> +Install the Build System
> +
> +
> +Download and install the build system from the
> +`Meson website 
> `_.
> +A good option to choose is the MSI installer for both meson and ninja 
> together.
> +Recommended version is either Meson 0.57.0 (baseline) or the latest release.
   ^^^ maybe use the word 'Required'

We need to be explicit for Windows. *Only* Meson 0.57.x can be used the
latest release of meson is not currently supported.

> +
> +Getting the DPDK Source
> +---
> +
> +Linux and FreeBSD
> +^
>  
>  .. code-block:: bash
>  
> +   wget https://fast.dpdk.org/rel/dpdk-.tar.xz
> +   tar -xvf dpdk-.tar.xz
> cd dpdk-
>  
> +Windows
> +^^^
> +
> +Download the DPDK source code from `DPDK's official website 
> `_ or clone the repository using a Git client. Extract 
> the downloaded archive, if applicable, and navigate to the DPDK directory.
> +
> +Navigate to the directory where the DPDK source code is located:
> +
> +.. code-block:: bash
> +
> +   cd C:\path\to\dpdk-
> +
>  Building DPDK
>  -
>  
> -Configure the build based on your needs, hardware, and environment. 
> -This might include setting specific flags or options. For example: “meson 
> setup -Dbuildtype=debugoptimized build”. Then compile using “ninja” and 
> install using “meson install”.
> +Linux and FreeBSD
> +^
>  
>  .. code-block:: bash
>  
> +   meson build
> ninja -C build
> -   cd build
> -   sudo ninja install
> -   ldconfig
>  
> -For detailed information on Meson build configuration options specific to 
> DPDK, see :ref:`DPDK Meson Build Configuration Options 
> `.
> +Windows
> +^^^
> +
> +**Option 1: Using Clang-LLVM**
> +
> +.. code-block:: bash
> +
> +   set CC=clang
> +   meson setup -Dexamples=helloworld build
> +   meson compile -C build
> +
> +**Option 2: Using MinGW-w64**
> +
> +.. code-block:: bash
> +
> +   set PATH=C:\MinGW\mingw64\bin;%PATH%
> +   meson setup -Dexamples=helloworld build
> +   meson compile -C build
> +
> +.. note::
> +
> +   For detailed information on Meson build configuration options specific to 
> DPDK, see :ref:`DPDK Meson Build Configuration Options 
> `.

This looks good, thanks for the updates.

With the one minor correction suggested above.

Acked-by: Tyler Retzlaff 


[PATCH 00/11] Add the support of multiple PF

2023-11-01 Thread Chaoyong He
Up to now, the NFP card using only one PF (or BDF) for multiple physical
ports, this force the PMD import the difference logic for 'PF' and
'physical port'. Which is not easy to understand and also not compatible
with some DPDK applications.
This patch series add the support of multiple PF, which will remove this
complexity by make sure one 'PF' for one 'physical port' with the help of
firmware.

Chaoyong He (1):
  net/nfp: refactor the probe logic of the secondary process

Peng Zhang (9):
  net/nfp: fix the failure to initialize the LSC mask
  net/nfp: add flag to indicate multiple PFs support
  net/nfp: add major version to nsp commands
  net/nfp: adjust physical port check for multiple PFs
  net/nfp: add the check about the firmware load
  net/nfp: add PF ID used to format symbols
  net/nfp: add nsp command to check if firmware is loaded
  net/nfp: introduce keepalive mechanism for multiple PF
  drivers: enable multiple PF in application firmware

Shihong Wang (1):
  net/nfp: fix the DMA error caused by app exit abnormally

 drivers/common/nfp/nfp_common_ctrl.h   |   1 +
 drivers/net/nfp/flower/nfp_flower.c|   4 +-
 drivers/net/nfp/flower/nfp_flower.h|   2 +-
 drivers/net/nfp/nfp_ethdev.c   | 460 ++---
 drivers/net/nfp/nfp_ethdev_vf.c|   2 +
 drivers/net/nfp/nfp_net_common.c   |   2 +-
 drivers/net/nfp/nfp_net_common.h   |  28 ++
 drivers/net/nfp/nfpcore/nfp_nsp.c  |  24 +-
 drivers/net/nfp/nfpcore/nfp_nsp.h  |   1 +
 drivers/net/nfp/nfpcore/nfp_resource.h |   3 +
 10 files changed, 464 insertions(+), 63 deletions(-)

-- 
2.39.1



[PATCH 01/11] net/nfp: refactor the probe logic of the secondary process

2023-11-01 Thread Chaoyong He
The probe logic of the secondary process of PF PMD now is not very
similarly with the logic of the primary process, which cause we need two
different logics when we add new feature in some case.

Refactor the probe logic of the secondary process to solve this problem.

Signed-off-by: Chaoyong He 
Signed-off-by: Peng Zhang 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/flower/nfp_flower.c |  4 +--
 drivers/net/nfp/flower/nfp_flower.h |  2 +-
 drivers/net/nfp/nfp_ethdev.c| 42 ++---
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/drivers/net/nfp/flower/nfp_flower.c 
b/drivers/net/nfp/flower/nfp_flower.c
index f2e6eb6a6f..6b523d98b0 100644
--- a/drivers/net/nfp/flower/nfp_flower.c
+++ b/drivers/net/nfp/flower/nfp_flower.c
@@ -859,7 +859,7 @@ nfp_init_app_fw_flower(struct nfp_pf_dev *pf_dev,
 }
 
 int
-nfp_secondary_init_app_fw_flower(struct nfp_cpp *cpp)
+nfp_secondary_init_app_fw_flower(struct nfp_pf_dev *pf_dev)
 {
struct rte_eth_dev *eth_dev;
const char *port_name = "pf_vnic_eth_dev";
@@ -872,7 +872,7 @@ nfp_secondary_init_app_fw_flower(struct nfp_cpp *cpp)
return -ENODEV;
}
 
-   eth_dev->process_private = cpp;
+   eth_dev->process_private = pf_dev->cpp;
eth_dev->dev_ops = &nfp_flower_pf_vnic_ops;
eth_dev->rx_pkt_burst = nfp_net_recv_pkts;
eth_dev->tx_pkt_burst = nfp_flower_pf_xmit_pkts;
diff --git a/drivers/net/nfp/flower/nfp_flower.h 
b/drivers/net/nfp/flower/nfp_flower.h
index 220b714018..6f27c06acc 100644
--- a/drivers/net/nfp/flower/nfp_flower.h
+++ b/drivers/net/nfp/flower/nfp_flower.h
@@ -106,7 +106,7 @@ nfp_flower_support_decap_v2(const struct nfp_app_fw_flower 
*app_fw_flower)
 
 int nfp_init_app_fw_flower(struct nfp_pf_dev *pf_dev,
const struct nfp_dev_info *dev_info);
-int nfp_secondary_init_app_fw_flower(struct nfp_cpp *cpp);
+int nfp_secondary_init_app_fw_flower(struct nfp_pf_dev *pf_dev);
 bool nfp_flower_pf_dispatch_pkts(struct nfp_net_hw *hw,
struct rte_mbuf *mbuf,
uint32_t port_id);
diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 4fae2e5540..705465046c 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -1006,9 +1006,7 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
 }
 
 static int
-nfp_secondary_init_app_fw_nic(struct rte_pci_device *pci_dev,
-   struct nfp_rtsym_table *sym_tbl,
-   struct nfp_cpp *cpp)
+nfp_secondary_init_app_fw_nic(struct nfp_pf_dev *pf_dev)
 {
uint32_t i;
int err = 0;
@@ -1017,7 +1015,7 @@ nfp_secondary_init_app_fw_nic(struct rte_pci_device 
*pci_dev,
struct nfp_net_hw *hw;
 
/* Read the number of vNIC's created for the PF */
-   total_vnics = nfp_rtsym_read_le(sym_tbl, "nfd_cfg_pf0_num_ports", &err);
+   total_vnics = nfp_rtsym_read_le(pf_dev->sym_tbl, 
"nfd_cfg_pf0_num_ports", &err);
if (err != 0 || total_vnics == 0 || total_vnics > 8) {
PMD_INIT_LOG(ERR, "nfd_cfg_pf0_num_ports symbol with wrong 
value");
return -ENODEV;
@@ -1027,7 +1025,7 @@ nfp_secondary_init_app_fw_nic(struct rte_pci_device 
*pci_dev,
struct rte_eth_dev *eth_dev;
char port_name[RTE_ETH_NAME_MAX_LEN];
snprintf(port_name, sizeof(port_name), "%s_port%u",
-   pci_dev->device.name, i);
+   pf_dev->pci_dev->device.name, i);
 
PMD_INIT_LOG(DEBUG, "Secondary attaching to port %s", 
port_name);
eth_dev = rte_eth_dev_attach_secondary(port_name);
@@ -1037,7 +1035,7 @@ nfp_secondary_init_app_fw_nic(struct rte_pci_device 
*pci_dev,
break;
}
 
-   eth_dev->process_private = cpp;
+   eth_dev->process_private = pf_dev->cpp;
hw = eth_dev->data->dev_private;
nfp_net_ethdev_ops_mount(hw, eth_dev);
 
@@ -1057,7 +1055,9 @@ nfp_pf_secondary_init(struct rte_pci_device *pci_dev)
 {
int ret = 0;
struct nfp_cpp *cpp;
+   struct nfp_pf_dev *pf_dev;
enum nfp_app_fw_id app_fw_id;
+   char name[RTE_ETH_NAME_MAX_LEN];
struct nfp_rtsym_table *sym_tbl;
const struct nfp_dev_info *dev_info;
 
@@ -1075,6 +1075,14 @@ nfp_pf_secondary_init(struct rte_pci_device *pci_dev)
return -ENODEV;
}
 
+   /* Allocate memory for the PF "device" */
+   snprintf(name, sizeof(name), "nfp_pf%d", 0);
+   pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0);
+   if (pf_dev == NULL) {
+   PMD_INIT_LOG(ERR, "Can't allocate memory for the PF device");
+   return -ENOMEM;
+   }
+
/*
 * When device bound to UIO, the device could be used, by mistake,
 * by two DPDK apps, and the UIO driver does not avoid it. This
@@ -1089,7 +1097,8 @@ nfp_pf_secondary_init(struct rte_pci_devi

[PATCH 02/11] net/nfp: fix the failure to initialize the LSC mask

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

In rare cases, when DPDK application exit, the interrupt handler was not
processed the interrupt in time, resulting in the LSC interrupt mask bit
not being cleared. So when the DPDK application start again, the newly
coming LSC interrupts cannot be received and processed properly.

Fix this problem by force clear the LSC interrupt mask on port
initialization.

Fixes: 6c53f87b3497 ("nfp: add link status interrupt")
Cc: sta...@dpdk.org

Signed-off-by: Shihong Wang 
Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
---
 drivers/net/nfp/nfp_ethdev.c | 2 ++
 drivers/net/nfp/nfp_ethdev_vf.c  | 2 ++
 drivers/net/nfp/nfp_net_common.c | 2 +-
 drivers/net/nfp/nfp_net_common.h | 1 +
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 705465046c..abaf31e27b 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -590,6 +590,8 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
nfp_net_dev_interrupt_handler, (void *)eth_dev);
/* Telling the firmware about the LSC interrupt entry */
nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+   /* Unmask the LSC interrupt */
+   nfp_net_irq_unmask(eth_dev);
/* Recording current stats counters values */
nfp_net_stats_reset(eth_dev);
 
diff --git a/drivers/net/nfp/nfp_ethdev_vf.c b/drivers/net/nfp/nfp_ethdev_vf.c
index f3aa649054..cc345e9218 100644
--- a/drivers/net/nfp/nfp_ethdev_vf.c
+++ b/drivers/net/nfp/nfp_ethdev_vf.c
@@ -351,6 +351,8 @@ nfp_netvf_init(struct rte_eth_dev *eth_dev)
nfp_net_dev_interrupt_handler, (void *)eth_dev);
/* Telling the firmware about the LSC interrupt entry */
nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
+   /* Unmask the LSC interrupt */
+   nfp_net_irq_unmask(eth_dev);
/* Recording current stats counters values */
nfp_net_stats_reset(eth_dev);
 
diff --git a/drivers/net/nfp/nfp_net_common.c b/drivers/net/nfp/nfp_net_common.c
index 4efcdff76f..f8ef049a42 100644
--- a/drivers/net/nfp/nfp_net_common.c
+++ b/drivers/net/nfp/nfp_net_common.c
@@ -1319,7 +1319,7 @@ nfp_net_dev_link_status_print(struct rte_eth_dev *dev)
  * If MSI-X auto-masking is enabled clear the mask bit, otherwise
  * clear the ICR for the entry.
  */
-static void
+void
 nfp_net_irq_unmask(struct rte_eth_dev *dev)
 {
struct nfp_net_hw *hw;
diff --git a/drivers/net/nfp/nfp_net_common.h b/drivers/net/nfp/nfp_net_common.h
index 1f9001c81d..b9df2fe563 100644
--- a/drivers/net/nfp/nfp_net_common.h
+++ b/drivers/net/nfp/nfp_net_common.h
@@ -205,6 +205,7 @@ int nfp_rx_queue_intr_enable(struct rte_eth_dev *dev, 
uint16_t queue_id);
 int nfp_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
 void nfp_net_params_setup(struct nfp_net_hw *hw);
 void nfp_net_cfg_queue_setup(struct nfp_net_hw *hw);
+void nfp_net_irq_unmask(struct rte_eth_dev *dev);
 void nfp_net_dev_interrupt_handler(void *param);
 void nfp_net_dev_interrupt_delayed_handler(void *param);
 int nfp_net_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
-- 
2.39.1



[PATCH 03/11] net/nfp: fix the DMA error caused by app exit abnormally

2023-11-01 Thread Chaoyong He
From: Shihong Wang 

When DPDK application exit abnormally, there might have DMA error,
and which will cause the load of firmware failed.

Fix this by force the physical port down to clear the possible DMA error.

Fixes: 896c265ef954 ("net/nfp: use new CPP interface")
Cc: sta...@dpdk.org

Signed-off-by: Shihong Wang 
Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
---
 drivers/net/nfp/nfp_ethdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index abaf31e27b..aa2b59af32 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -847,6 +847,7 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
 static int
 nfp_pf_init(struct rte_pci_device *pci_dev)
 {
+   uint32_t i;
int ret = 0;
uint64_t addr;
uint32_t cpp_id;
@@ -905,6 +906,10 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
goto hwinfo_cleanup;
}
 
+   /* Force the physical port down to clear the possible DMA error */
+   for (i = 0; i < nfp_eth_table->count; i++)
+   nfp_eth_set_configured(cpp, nfp_eth_table->ports[i].index, 0);
+
if (nfp_fw_setup(pci_dev, cpp, nfp_eth_table, hwinfo) != 0) {
PMD_INIT_LOG(ERR, "Error when uploading firmware");
ret = -EIO;
-- 
2.39.1



[PATCH 04/11] net/nfp: add flag to indicate multiple PFs support

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

Support for multiple PFs have been added to the NFP3800 firmware. This
can be detected by reading the NSP major version, which was bumped to 1
when support was added.

Add a flag and detecting method to record if the current device is
cabable to support multiple PFs. This will be used in later patches to
initialize and make use of this new feature.

Noteworthy about the detection method from NSP version information, the
NSP minor version was not touched when increasing the major version.
This makes the first NSP version to support multiple PFs version 1.8,
while the latest version without this supports remains 0.8.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfp_ethdev.c  | 49 +++
 drivers/net/nfp/nfp_net_common.h  |  8 +
 drivers/net/nfp/nfpcore/nfp_nsp.c | 14 +++--
 3 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index aa2b59af32..7022ef435f 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -479,7 +479,7 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
rte_eth_copy_pci_info(eth_dev, pci_dev);
 
-   if (port == 0) {
+   if (port == 0 || pf_dev->multi_pf.enabled) {
uint32_t min_size;
 
hw->ctrl_bar = pf_dev->ctrl_bar;
@@ -712,6 +712,26 @@ nfp_fw_setup(struct rte_pci_device *dev,
return err;
 }
 
+static inline bool
+nfp_check_multi_pf_from_nsp(struct rte_pci_device *pci_dev,
+   struct nfp_cpp *cpp)
+{
+   bool flag;
+   struct nfp_nsp *nsp;
+
+   nsp = nfp_nsp_open(cpp);
+   if (nsp == NULL) {
+   PMD_DRV_LOG(ERR, "NFP error when obtaining NSP handle");
+   return false;
+   }
+
+   flag = (nfp_nsp_get_abi_ver_major(nsp) > 0) &&
+   (pci_dev->id.device_id == PCI_DEVICE_ID_NFP3800_PF_NIC);
+
+   nfp_nsp_close(nsp);
+   return flag;
+}
+
 static int
 nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
const struct nfp_dev_info *dev_info)
@@ -874,6 +894,14 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
return -ENODEV;
}
 
+   /* Allocate memory for the PF "device" */
+   snprintf(name, sizeof(name), "nfp_pf%d", 0);
+   pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0);
+   if (pf_dev == NULL) {
+   PMD_INIT_LOG(ERR, "Can't allocate memory for the PF device");
+   return -ENOMEM;
+   }
+
/*
 * When device bound to UIO, the device could be used, by mistake,
 * by two DPDK apps, and the UIO driver does not avoid it. This
@@ -888,7 +916,8 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
 
if (cpp == NULL) {
PMD_INIT_LOG(ERR, "A CPP handle can not be obtained");
-   return -EIO;
+   ret = -EIO;
+   goto pf_cleanup;
}
 
hwinfo = nfp_hwinfo_read(cpp);
@@ -906,6 +935,8 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
goto hwinfo_cleanup;
}
 
+   pf_dev->multi_pf.enabled = nfp_check_multi_pf_from_nsp(pci_dev, cpp);
+
/* Force the physical port down to clear the possible DMA error */
for (i = 0; i < nfp_eth_table->count; i++)
nfp_eth_set_configured(cpp, nfp_eth_table->ports[i].index, 0);
@@ -932,14 +963,6 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
goto sym_tbl_cleanup;
}
 
-   /* Allocate memory for the PF "device" */
-   snprintf(name, sizeof(name), "nfp_pf%d", 0);
-   pf_dev = rte_zmalloc(name, sizeof(*pf_dev), 0);
-   if (pf_dev == NULL) {
-   ret = -ENOMEM;
-   goto sym_tbl_cleanup;
-   }
-
/* Populate the newly created PF device */
pf_dev->app_fw_id = app_fw_id;
pf_dev->cpp = cpp;
@@ -957,7 +980,7 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
if (pf_dev->qc_bar == NULL) {
PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for net.qc");
ret = -EIO;
-   goto pf_cleanup;
+   goto sym_tbl_cleanup;
}
 
PMD_INIT_LOG(DEBUG, "qc_bar address: %p", pf_dev->qc_bar);
@@ -998,8 +1021,6 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
 
 hwqueues_cleanup:
nfp_cpp_area_free(pf_dev->qc_area);
-pf_cleanup:
-   rte_free(pf_dev);
 sym_tbl_cleanup:
free(sym_tbl);
 eth_table_cleanup:
@@ -1008,6 +1029,8 @@ nfp_pf_init(struct rte_pci_device *pci_dev)
free(hwinfo);
 cpp_cleanup:
nfp_cpp_free(cpp);
+pf_cleanup:
+   rte_free(pf_dev);
 
return ret;
 }
diff --git a/drivers/net/nfp/nfp_net_common.h b/drivers/net/nfp/nfp_net_common.h
index b9df2fe563..bd0ed077c5 100644
--- a/drivers/net/nfp/nfp_net_common.h
+++ b/drivers/net/nfp/nfp_net_common.h
@@ -54,6 +54,11 @@ struct nfp_net_tlv_caps {
uint32_t mbox_cmsg_types;/**< Cmsgs which can be 

[PATCH 05/11] net/nfp: add major version to nsp commands

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

The commands sent to the NSP take the NSP major version into account. Up
until now only NSP major version 0 have been supported and the value
have been hard-coded to 0.

In preparation to add support for both NSP version 0.x and 1.x, extend
the command to take the running NSP version into account.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfpcore/nfp_nsp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfpcore/nfp_nsp.c 
b/drivers/net/nfp/nfpcore/nfp_nsp.c
index 9f88b822f3..589d878e0d 100644
--- a/drivers/net/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/nfp/nfpcore/nfp_nsp.c
@@ -22,7 +22,8 @@
 
 #define NSP_COMMAND 0x08
 #define   NSP_COMMAND_OPTIONGENMASK_ULL(63, 32)
-#define   NSP_COMMAND_CODE  GENMASK_ULL(31, 16)
+#define   NSP_COMMAND_VER_MAJOR GENMASK_ULL(31, 28)
+#define   NSP_COMMAND_CODE  GENMASK_ULL(27, 16)
 #define   NSP_COMMAND_DMA_BUF   RTE_BIT64(1)
 #define   NSP_COMMAND_START RTE_BIT64(0)
 
@@ -370,6 +371,7 @@ nfp_nsp_command_real(struct nfp_nsp *state,
 
err = nfp_cpp_writeq(cpp, nsp_cpp, nsp_command,
FIELD_PREP(NSP_COMMAND_OPTION, arg->option) |
+   FIELD_PREP(NSP_COMMAND_VER_MAJOR, state->ver.major) |
FIELD_PREP(NSP_COMMAND_CODE, arg->code) |
FIELD_PREP(NSP_COMMAND_DMA_BUF, arg->dma) |
FIELD_PREP(NSP_COMMAND_START, 1));
-- 
2.39.1



[PATCH 06/11] net/nfp: adjust physical port check for multiple PFs

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

If the firmware supports multiple PFs each PF is represented by a single
physical port. While if the firmware only supports a single PF there
might be one or more physical ports represented by a single PF.

Adjust the check to handle both single and multiple PFs firmware.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfp_ethdev.c | 33 +
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 7022ef435f..3ebfd444b3 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -712,6 +712,15 @@ nfp_fw_setup(struct rte_pci_device *dev,
return err;
 }
 
+static inline bool
+nfp_check_multi_pf_from_fw(uint32_t total_vnics)
+{
+   if (total_vnics == 1)
+   return true;
+
+   return false;
+}
+
 static inline bool
 nfp_check_multi_pf_from_nsp(struct rte_pci_device *pci_dev,
struct nfp_cpp *cpp)
@@ -765,14 +774,22 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
goto app_cleanup;
}
 
-   /*
-* For coreNIC the number of vNICs exposed should be the same as the
-* number of physical ports.
-*/
-   if (total_vnics != nfp_eth_table->count) {
-   PMD_INIT_LOG(ERR, "Total physical ports do not match number of 
vNICs");
-   ret = -ENODEV;
-   goto app_cleanup;
+   if (pf_dev->multi_pf.enabled) {
+   if (!nfp_check_multi_pf_from_fw(total_vnics)) {
+   PMD_INIT_LOG(ERR, "NSP report multipf, but FW report 
not multipf");
+   ret = -ENODEV;
+   goto app_cleanup;
+   }
+   } else {
+   /*
+* For coreNIC the number of vNICs exposed should be the same 
as the
+* number of physical ports.
+*/
+   if (total_vnics != nfp_eth_table->count) {
+   PMD_INIT_LOG(ERR, "Total physical ports do not match 
number of vNICs");
+   ret = -ENODEV;
+   goto app_cleanup;
+   }
}
 
/* Populate coreNIC app properties */
-- 
2.39.1



[PATCH 07/11] net/nfp: add the check about the firmware load

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

When firmware load failed, it doesn't have any notice.
So add the check about the firmware load and add an exit
point when the firmware load process fail.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfp_ethdev.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 3ebfd444b3..9378a2ebc3 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -654,7 +654,12 @@ nfp_fw_upload(struct rte_pci_device *dev,
PMD_DRV_LOG(INFO, "Firmware file found at %s with size: %zu",
fw_name, fsize);
PMD_DRV_LOG(INFO, "Uploading the firmware ...");
-   nfp_nsp_load_fw(nsp, fw_buf, fsize);
+   if (nfp_nsp_load_fw(nsp, fw_buf, fsize) < 0) {
+   free(fw_buf);
+   PMD_DRV_LOG(ERR, "Firmware load failed.");
+   return -EIO;
+   }
+
PMD_DRV_LOG(INFO, "Done");
 
free(fw_buf);
-- 
2.39.1



[PATCH 08/11] net/nfp: add PF ID used to format symbols

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

In single PF scenario, the format symbols just is related
with PF ID 0. In multiple PF scenario, the format symbols
should be related with PF ID. So this commit adds the
PF ID used to format symbols.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfp_ethdev.c | 109 ---
 drivers/net/nfp/nfp_net_common.h |   2 +
 2 files changed, 86 insertions(+), 25 deletions(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 9378a2ebc3..96f0ae3fe3 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -224,11 +224,22 @@ nfp_net_set_link_down(struct rte_eth_dev *dev)
return nfp_eth_set_configured(dev->process_private, 
hw->nfp_idx, 0);
 }
 
+static uint8_t
+nfp_function_id_get(const struct nfp_pf_dev *pf_dev,
+   uint8_t phy_port)
+{
+   if (pf_dev->multi_pf.enabled)
+   return pf_dev->multi_pf.function_id;
+
+   return phy_port;
+}
+
 /* Reset and stop device. The device can not be restarted. */
 static int
 nfp_net_close(struct rte_eth_dev *dev)
 {
uint8_t i;
+   uint8_t id;
struct nfp_net_hw *hw;
struct nfp_pf_dev *pf_dev;
struct rte_pci_device *pci_dev;
@@ -264,8 +275,10 @@ nfp_net_close(struct rte_eth_dev *dev)
app_fw_nic->ports[hw->idx] = NULL;
 
for (i = 0; i < app_fw_nic->total_phyports; i++) {
+   id = nfp_function_id_get(pf_dev, i);
+
/* Check to see if ports are still in use */
-   if (app_fw_nic->ports[i] != NULL)
+   if (app_fw_nic->ports[id] != NULL)
return 0;
}
 
@@ -667,6 +680,19 @@ nfp_fw_upload(struct rte_pci_device *dev,
return 0;
 }
 
+static void
+nfp_fw_unload(struct nfp_cpp *cpp)
+{
+   struct nfp_nsp *nsp;
+
+   nsp = nfp_nsp_open(cpp);
+   if (nsp == NULL)
+   return;
+
+   nfp_nsp_device_soft_reset(nsp);
+   nfp_nsp_close(nsp);
+}
+
 static int
 nfp_fw_setup(struct rte_pci_device *dev,
struct nfp_cpp *cpp,
@@ -751,6 +777,7 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
const struct nfp_dev_info *dev_info)
 {
uint8_t i;
+   uint8_t id;
int ret = 0;
uint32_t total_vnics;
struct nfp_net_hw *hw;
@@ -758,10 +785,13 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
struct rte_eth_dev *eth_dev;
struct nfp_app_fw_nic *app_fw_nic;
struct nfp_eth_table *nfp_eth_table;
+   char bar_name[RTE_ETH_NAME_MAX_LEN];
char port_name[RTE_ETH_NAME_MAX_LEN];
+   char vnic_name[RTE_ETH_NAME_MAX_LEN];
 
nfp_eth_table = pf_dev->nfp_eth_table;
PMD_INIT_LOG(INFO, "Total physical ports: %d", nfp_eth_table->count);
+   id = nfp_function_id_get(pf_dev, 0);
 
/* Allocate memory for the CoreNIC app */
app_fw_nic = rte_zmalloc("nfp_app_fw_nic", sizeof(*app_fw_nic), 0);
@@ -772,9 +802,10 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
pf_dev->app_fw_priv = app_fw_nic;
 
/* Read the number of vNIC's created for the PF */
-   total_vnics = nfp_rtsym_read_le(pf_dev->sym_tbl, 
"nfd_cfg_pf0_num_ports", &ret);
+   snprintf(vnic_name, sizeof(vnic_name), "nfd_cfg_pf%u_num_ports", id);
+   total_vnics = nfp_rtsym_read_le(pf_dev->sym_tbl, vnic_name, &ret);
if (ret != 0 || total_vnics == 0 || total_vnics > 8) {
-   PMD_INIT_LOG(ERR, "nfd_cfg_pf0_num_ports symbol with wrong 
value");
+   PMD_INIT_LOG(ERR, "%s symbol with wrong value", vnic_name);
ret = -ENODEV;
goto app_cleanup;
}
@@ -804,11 +835,12 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
app_fw_nic->multiport = true;
 
/* Map the symbol table */
-   pf_dev->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, "_pf0_net_bar0",
+   snprintf(bar_name, sizeof(bar_name), "_pf%u_net_bar0", id);
+   pf_dev->ctrl_bar = nfp_rtsym_map(pf_dev->sym_tbl, bar_name,
app_fw_nic->total_phyports * NFP_NET_CFG_BAR_SZ,
&pf_dev->ctrl_area);
if (pf_dev->ctrl_bar == NULL) {
-   PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for _pf0_net_ctrl_bar");
+   PMD_INIT_LOG(ERR, "nfp_rtsym_map fails for %s", bar_name);
ret = -EIO;
goto app_cleanup;
}
@@ -818,8 +850,9 @@ nfp_init_app_fw_nic(struct nfp_pf_dev *pf_dev,
/* Loop through all physical ports on PF */
numa_node = rte_socket_id();
for (i = 0; i < app_fw_nic->total_phyports; i++) {
-   snprintf(port_name, sizeof(port_name), "%s_port%d",
-   pf_dev->pci_dev->device.name, i);
+   id = nfp_function_id_get(pf_dev, i);
+   snprintf(port_name, sizeof(port_name), "%s_port%u",
+   pf_dev->pci_dev->de

[PATCH 09/11] net/nfp: add nsp command to check if firmware is loaded

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

Add a NSP command to check if any firmware have been loaded.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfpcore/nfp_nsp.c | 6 ++
 drivers/net/nfp/nfpcore/nfp_nsp.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/nfp/nfpcore/nfp_nsp.c 
b/drivers/net/nfp/nfpcore/nfp_nsp.c
index 589d878e0d..e5aaef8d55 100644
--- a/drivers/net/nfp/nfpcore/nfp_nsp.c
+++ b/drivers/net/nfp/nfpcore/nfp_nsp.c
@@ -637,6 +637,12 @@ nfp_nsp_load_fw(struct nfp_nsp *state,
return 0;
 }
 
+bool
+nfp_nsp_fw_loaded(struct nfp_nsp *state)
+{
+   return nfp_nsp_command(state, SPCODE_FW_LOADED) > 0;
+}
+
 int
 nfp_nsp_read_eth_table(struct nfp_nsp *state,
void *buf,
diff --git a/drivers/net/nfp/nfpcore/nfp_nsp.h 
b/drivers/net/nfp/nfpcore/nfp_nsp.h
index fe52dffeb7..492fa7e99f 100644
--- a/drivers/net/nfp/nfpcore/nfp_nsp.h
+++ b/drivers/net/nfp/nfpcore/nfp_nsp.h
@@ -212,5 +212,6 @@ enum nfp_nsp_sensor_id {
 
 int nfp_hwmon_read_sensor(struct nfp_cpp *cpp, enum nfp_nsp_sensor_id id,
uint32_t *val);
+bool nfp_nsp_fw_loaded(struct nfp_nsp *state);
 
 #endif /* __NSP_NSP_H__ */
-- 
2.39.1



[PATCH 10/11] net/nfp: introduce keepalive mechanism for multiple PF

2023-11-01 Thread Chaoyong He
From: Peng Zhang 

In multiple PF scenario, management firmware is in charge of
application firmware unloading instead of driver by keepalive
mechanism.

A new NSP resource area is allocated for keepalive use with name
"nfp.beat". Driver sets periodically updates the PFs' corresponding
word in "nfp.beat". Management firmware checks these PF's words to
learn whether and which PF are alive, and will unload the application
firmware if no PF is running.

Signed-off-by: Peng Zhang 
Reviewed-by: Chaoyong He 
Reviewed-by: Long Wu 
---
 drivers/net/nfp/nfp_ethdev.c   | 173 -
 drivers/net/nfp/nfp_net_common.h   |  17 +++
 drivers/net/nfp/nfpcore/nfp_resource.h |   3 +
 3 files changed, 189 insertions(+), 4 deletions(-)

diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c
index 96f0ae3fe3..bbc0109f5f 100644
--- a/drivers/net/nfp/nfp_ethdev.c
+++ b/drivers/net/nfp/nfp_ethdev.c
@@ -5,6 +5,8 @@
  * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
  */
 
+#include 
+
 #include 
 #include 
 
@@ -16,6 +18,7 @@
 #include "nfpcore/nfp_rtsym.h"
 #include "nfpcore/nfp_nsp.h"
 #include "nfpcore/nfp6000_pcie.h"
+#include "nfpcore/nfp_resource.h"
 
 #include "nfp_cpp_bridge.h"
 #include "nfp_ipsec.h"
@@ -234,6 +237,79 @@ nfp_function_id_get(const struct nfp_pf_dev *pf_dev,
return phy_port;
 }
 
+static void
+nfp_net_beat_timer(void *arg)
+{
+   uint64_t cur_sec;
+   struct nfp_multi_pf *multi_pf = arg;
+
+   cur_sec = rte_rdtsc();
+   nn_writeq(cur_sec, multi_pf->beat_addr + 
NFP_BEAT_OFFSET(multi_pf->function_id));
+
+   /* Beat once per second. */
+   if (rte_eal_alarm_set(1000 * 1000, nfp_net_beat_timer,
+   (void *)multi_pf) < 0) {
+   PMD_DRV_LOG(ERR, "Error setting alarm");
+   }
+}
+
+static int
+nfp_net_keepalive_init(struct nfp_cpp *cpp,
+   struct nfp_multi_pf *multi_pf)
+{
+   uint8_t *base;
+   uint64_t addr;
+   uint32_t size;
+   uint32_t cpp_id;
+   struct nfp_resource *res;
+
+   res = nfp_resource_acquire(cpp, NFP_RESOURCE_KEEPALIVE);
+   if (res == NULL)
+   return -EIO;
+
+   cpp_id = nfp_resource_cpp_id(res);
+   addr = nfp_resource_address(res);
+   size = nfp_resource_size(res);
+
+   nfp_resource_release(res);
+
+   /* Allocate a fixed area for keepalive. */
+   base = nfp_cpp_map_area(cpp, cpp_id, addr, size, &multi_pf->beat_area);
+   if (base == NULL) {
+   PMD_DRV_LOG(ERR, "Failed to map area for keepalive.");
+   return -EIO;
+   }
+
+   multi_pf->beat_addr = base;
+
+   return 0;
+}
+
+static void
+nfp_net_keepalive_uninit(struct nfp_multi_pf *multi_pf)
+{
+   nfp_cpp_area_release_free(multi_pf->beat_area);
+}
+
+static int
+nfp_net_keepalive_start(struct nfp_multi_pf *multi_pf)
+{
+   if (rte_eal_alarm_set(1000 * 1000, nfp_net_beat_timer,
+   (void *)multi_pf) < 0) {
+   PMD_DRV_LOG(ERR, "Error setting alarm");
+   return -EIO;
+   }
+
+   return 0;
+}
+
+static void
+nfp_net_keepalive_stop(struct nfp_multi_pf *multi_pf)
+{
+   /* Cancel keepalive for multiple PF setup */
+   rte_eal_alarm_cancel(nfp_net_beat_timer, (void *)multi_pf);
+}
+
 /* Reset and stop device. The device can not be restarted. */
 static int
 nfp_net_close(struct rte_eth_dev *dev)
@@ -284,6 +360,10 @@ nfp_net_close(struct rte_eth_dev *dev)
 
/* Now it is safe to free all PF resources */
PMD_INIT_LOG(INFO, "Freeing PF resources");
+   if (pf_dev->multi_pf.enabled) {
+   nfp_net_keepalive_stop(&pf_dev->multi_pf);
+   nfp_net_keepalive_uninit(&pf_dev->multi_pf);
+   }
nfp_cpp_area_free(pf_dev->ctrl_area);
nfp_cpp_area_free(pf_dev->qc_area);
free(pf_dev->hwinfo);
@@ -693,11 +773,92 @@ nfp_fw_unload(struct nfp_cpp *cpp)
nfp_nsp_close(nsp);
 }
 
+static int
+nfp_fw_reload(struct rte_pci_device *dev,
+   struct nfp_nsp *nsp,
+   char *card_desc)
+{
+   int err;
+
+   nfp_nsp_device_soft_reset(nsp);
+   err = nfp_fw_upload(dev, nsp, card_desc);
+   if (err != 0)
+   PMD_DRV_LOG(ERR, "NFP firmware load failed");
+
+   return err;
+}
+
+static int
+nfp_fw_loaded_check_alive(struct rte_pci_device *dev,
+   struct nfp_nsp *nsp,
+   char *card_desc,
+   const struct nfp_dev_info *dev_info,
+   struct nfp_multi_pf *multi_pf)
+{
+   int offset;
+   uint32_t i;
+   uint64_t beat;
+   uint32_t port_num;
+
+   /*
+* If the beats of any other port changed in 3s,
+* we should not reload the firmware.
+*/
+   for (port_num = 0; port_num < dev_info->pf_num_per_unit; port_num++) {
+   if (port_num == multi_pf->function_id)
+   continue;
+
+   offset =

  1   2   >