[v2 1/5] raw/zxdh: introduce zxdh raw device driver

2024-08-12 Thread Yong Zhang
Introduce rawdev driver support for ZXDH which
can help to connect two separate hosts with each other.

Signed-off-by: Yong Zhang 
---
 MAINTAINERS|   5 +
 doc/guides/rawdevs/index.rst   |   1 +
 doc/guides/rawdevs/zxdh.rst|  30 +
 drivers/raw/meson.build|   1 +
 drivers/raw/zxdh/meson.build   |   5 +
 drivers/raw/zxdh/zxdh_rawdev.c | 220 +
 drivers/raw/zxdh/zxdh_rawdev.h | 118 ++
 7 files changed, 380 insertions(+)
 create mode 100644 doc/guides/rawdevs/zxdh.rst
 create mode 100644 drivers/raw/zxdh/meson.build
 create mode 100644 drivers/raw/zxdh/zxdh_rawdev.c
 create mode 100644 drivers/raw/zxdh/zxdh_rawdev.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c5a703b5c0..6dd4fbae6e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1511,6 +1511,11 @@ M: Gagandeep Singh 
 F: drivers/raw/dpaa2_cmdif/
 F: doc/guides/rawdevs/dpaa2_cmdif.rst
 
+ZXDH
+M: Yong Zhang 
+F: drivers/raw/zxdh/
+F: doc/guides/rawdevs/zxdh.rst
+
 
 Packet processing
 -
diff --git a/doc/guides/rawdevs/index.rst b/doc/guides/rawdevs/index.rst
index f34315f051..d85a4b7148 100644
--- a/doc/guides/rawdevs/index.rst
+++ b/doc/guides/rawdevs/index.rst
@@ -16,3 +16,4 @@ application through rawdev API.
 dpaa2_cmdif
 ifpga
 ntb
+zxdh
diff --git a/doc/guides/rawdevs/zxdh.rst b/doc/guides/rawdevs/zxdh.rst
new file mode 100644
index 00..fa7ada1004
--- /dev/null
+++ b/doc/guides/rawdevs/zxdh.rst
@@ -0,0 +1,30 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+Copyright 2024 ZTE Corporation
+
+ZXDH Rawdev Driver
+==
+
+The ``zxdh`` rawdev driver is an implementation of the rawdev API,
+that provides communication between two separate hosts.
+This is achieved via using the GDMA controller of Dinghai SoC,
+which can be configured through exposed MPF devices.
+
+Device Setup
+-
+
+It is recommended to bind the ZXDH MPF kernel driver for MPF devices (Not 
mandatory).
+The kernel drivers can be downloaded at `ZTE Official Website
+`_.
+
+Initialization
+--
+
+The ``zxdh`` rawdev driver needs to work in IOVA PA mode.
+Consider using ``--iova-mode=pa`` in the EAL options.
+
+Platform Requirement
+
+
+This PMD is only supported on ZTE Neo Platforms:
+- Neo X510/X512
+
diff --git a/drivers/raw/meson.build b/drivers/raw/meson.build
index 05cad143fe..237d1bdd80 100644
--- a/drivers/raw/meson.build
+++ b/drivers/raw/meson.build
@@ -12,5 +12,6 @@ drivers = [
 'ifpga',
 'ntb',
 'skeleton',
+'zxdh',
 ]
 std_deps = ['rawdev']
diff --git a/drivers/raw/zxdh/meson.build b/drivers/raw/zxdh/meson.build
new file mode 100644
index 00..266d3db6d8
--- /dev/null
+++ b/drivers/raw/zxdh/meson.build
@@ -0,0 +1,5 @@
+#SPDX-License-Identifier: BSD-3-Clause
+#Copyright 2024 ZTE Corporation
+
+deps += ['rawdev', 'kvargs', 'mbuf', 'bus_pci']
+sources = files('zxdh_rawdev.c')
diff --git a/drivers/raw/zxdh/zxdh_rawdev.c b/drivers/raw/zxdh/zxdh_rawdev.c
new file mode 100644
index 00..269c4f92e0
--- /dev/null
+++ b/drivers/raw/zxdh/zxdh_rawdev.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2024 ZTE Corporation
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "zxdh_rawdev.h"
+
+/* Register offset */
+#define ZXDH_GDMA_BASE_OFFSET   0x10
+
+#define ZXDH_GDMA_CHAN_SHIFT0x80
+char zxdh_gdma_driver_name[] = "rawdev_zxdh_gdma";
+char dev_name[] = "zxdh_gdma";
+
+uint32_t
+zxdh_gdma_read_reg(struct rte_rawdev *dev, uint16_t queue_id, uint32_t offset)
+{
+   struct zxdh_gdma_rawdev *gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   uint32_t addr = 0;
+   uint32_t val = 0;
+
+   addr = offset + queue_id * ZXDH_GDMA_CHAN_SHIFT;
+   val = *(uint32_t *)(gdmadev->base_addr + addr);
+
+   return val;
+}
+
+void
+zxdh_gdma_write_reg(struct rte_rawdev *dev, uint16_t queue_id, uint32_t 
offset, uint32_t val)
+{
+   struct zxdh_gdma_rawdev *gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   uint32_t addr = 0;
+
+   addr = offset + queue_id * ZXDH_GDMA_CHAN_SHIFT;
+   *(uint32_t *)(gdmadev->base_addr + addr) = val;
+}
+
+static const struct rte_rawdev_ops zxdh_gdma_rawdev_ops = {
+};
+
+static int
+zxdh_gdma_map_resource(struct rte_pci_device *dev)
+{
+   int fd = -1;
+   char devname[PATH_MAX];
+   void *mapaddr = NULL;
+   struct rte_pci_addr *loc;
+
+   loc = &dev->addr;
+   snprintf(devname, sizeof(devname), "%s/" PCI_PRI_FMT "/resource0",
+   rte_pci_get_sysfs_path(),
+   loc->domain, loc->bus, loc->devid

[v2 2/5] raw/zxdh: add support for queue setup operation

2024-08-12 Thread Yong Zhang
Add queue initialization and release interface.

Signed-off-by: Yong Zhang 
---
 drivers/raw/zxdh/zxdh_rawdev.c | 242 +
 drivers/raw/zxdh/zxdh_rawdev.h |  19 +++
 2 files changed, 261 insertions(+)

diff --git a/drivers/raw/zxdh/zxdh_rawdev.c b/drivers/raw/zxdh/zxdh_rawdev.c
index 269c4f92e0..a76e3cda39 100644
--- a/drivers/raw/zxdh/zxdh_rawdev.c
+++ b/drivers/raw/zxdh/zxdh_rawdev.c
@@ -36,13 +36,58 @@
 
 #include "zxdh_rawdev.h"
 
+/*
+ * User define:
+ * ep_id-bit[15:12] vfunc_num-bit[11:4] func_num-bit[3:1] vfunc_active-bit0
+ * host ep_id:5~8   zf ep_id:9
+ */
+#define ZXDH_GDMA_ZF_USER   0x9000  /* ep4 pf0 */
+#define ZXDH_GDMA_PF_NUM_SHIFT  1
+#define ZXDH_GDMA_VF_NUM_SHIFT  4
+#define ZXDH_GDMA_EP_ID_SHIFT   12
+#define ZXDH_GDMA_VF_EN 1
+#define ZXDH_GDMA_EPID_OFFSET   5
+
 /* Register offset */
 #define ZXDH_GDMA_BASE_OFFSET   0x10
+#define ZXDH_GDMA_EXT_ADDR_OFFSET   0x218
+#define ZXDH_GDMA_CONTROL_OFFSET0x230
+#define ZXDH_GDMA_TC_CNT_OFFSET 0x23c
+#define ZXDH_GDMA_LLI_USER_OFFSET   0x228
+
+#define ZXDH_GDMA_CHAN_FORCE_CLOSE  (1 << 31)
+
+/* TC count & Error interrupt status register */
+#define ZXDH_GDMA_SRC_LLI_ERR   (1 << 16)
+#define ZXDH_GDMA_SRC_DATA_ERR  (1 << 17)
+#define ZXDH_GDMA_DST_ADDR_ERR  (1 << 18)
+#define ZXDH_GDMA_ERR_STATUS(1 << 19)
+#define ZXDH_GDMA_ERR_INTR_ENABLE   (1 << 20)
+#define ZXDH_GDMA_TC_CNT_CLEAN  (1)
 
 #define ZXDH_GDMA_CHAN_SHIFT0x80
+#define LOW32_MASK  0x
+#define LOW16_MASK  0x
+
+static int zxdh_gdma_queue_init(struct rte_rawdev *dev, uint16_t queue_id);
+static int zxdh_gdma_queue_free(struct rte_rawdev *dev, uint16_t queue_id);
+
 char zxdh_gdma_driver_name[] = "rawdev_zxdh_gdma";
 char dev_name[] = "zxdh_gdma";
 
+static inline struct zxdh_gdma_queue *
+zxdh_gdma_get_queue(struct rte_rawdev *dev, uint16_t queue_id)
+{
+   struct zxdh_gdma_rawdev *gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+
+   if (queue_id >= ZXDH_GDMA_TOTAL_CHAN_NUM) {
+   ZXDH_PMD_LOG(ERR, "queue id %d is invalid", queue_id);
+   return NULL;
+   }
+
+   return &(gdmadev->vqs[queue_id]);
+}
+
 uint32_t
 zxdh_gdma_read_reg(struct rte_rawdev *dev, uint16_t queue_id, uint32_t offset)
 {
@@ -66,9 +111,206 @@ zxdh_gdma_write_reg(struct rte_rawdev *dev, uint16_t 
queue_id, uint32_t offset,
*(uint32_t *)(gdmadev->base_addr + addr) = val;
 }
 
+static int
+zxdh_gdma_rawdev_queue_setup(struct rte_rawdev *dev,
+uint16_t queue_id,
+rte_rawdev_obj_t 
queue_conf,
+size_t conf_size)
+{
+   struct zxdh_gdma_rawdev *gdmadev = NULL;
+   struct zxdh_gdma_queue *queue = NULL;
+   struct zxdh_gdma_queue_config *qconfig = NULL;
+   struct zxdh_gdma_rbp *rbp = NULL;
+   uint16_t i = 0;
+   uint8_t is_txq = 0;
+   uint32_t src_user = 0;
+   uint32_t dst_user = 0;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   if ((queue_conf == NULL) || (conf_size != sizeof(struct 
zxdh_gdma_queue_config)))
+   return -EINVAL;
+
+   gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   qconfig = (struct zxdh_gdma_queue_config *)queue_conf;
+
+   for (i = 0; i < ZXDH_GDMA_TOTAL_CHAN_NUM; i++) {
+   if (gdmadev->vqs[i].enable == 0)
+   break;
+   }
+   if (i >= ZXDH_GDMA_TOTAL_CHAN_NUM) {
+   ZXDH_PMD_LOG(ERR, "Failed to setup queue, no avail queues");
+   return -1;
+   }
+   queue_id = i;
+   if (zxdh_gdma_queue_init(dev, queue_id) != 0) {
+   ZXDH_PMD_LOG(ERR, "Failed to init queue");
+   return -1;
+   }
+   queue = &(gdmadev->vqs[queue_id]);
+
+   rbp = qconfig->rbp;
+   if ((rbp->srbp != 0) && (rbp->drbp == 0)) {
+   is_txq = 0;
+   dst_user = ZXDH_GDMA_ZF_USER;
+   src_user = ((rbp->spfid << ZXDH_GDMA_PF_NUM_SHIFT) |
+   ((rbp->sportid + ZXDH_GDMA_EPID_OFFSET) << 
ZXDH_GDMA_EP_ID_SHIFT));
+
+   if (rbp->svfid != 0)
+   src_user |= (ZXDH_GDMA_VF_EN |
+((rbp->svfid - 1) << 
ZXDH_GDMA_VF_NUM_SHIFT));
+
+   ZXDH_PMD_LOG(DEBUG, "rxq->qidx:%d setup src_user(ep:%d pf:%d 
vf:%d) success",
+   queue_id, (uint8_t)rbp->sportid, 
(uint8_t)rbp->spfid,
+   (uint8_t)rbp->svfid);
+   } else if ((rbp->srbp == 0

[v2 3/5] raw/zxdh: add support for standard rawdev operations

2024-08-12 Thread Yong Zhang
Add support for rawdev operations such as dev_start and dev_stop.

Signed-off-by: Yong Zhang 
---
 drivers/raw/zxdh/zxdh_rawdev.c | 136 -
 drivers/raw/zxdh/zxdh_rawdev.h |  10 +++
 2 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/drivers/raw/zxdh/zxdh_rawdev.c b/drivers/raw/zxdh/zxdh_rawdev.c
index a76e3cda39..363011dfcc 100644
--- a/drivers/raw/zxdh/zxdh_rawdev.c
+++ b/drivers/raw/zxdh/zxdh_rawdev.c
@@ -111,6 +111,96 @@ zxdh_gdma_write_reg(struct rte_rawdev *dev, uint16_t 
queue_id, uint32_t offset,
*(uint32_t *)(gdmadev->base_addr + addr) = val;
 }
 
+static int
+zxdh_gdma_rawdev_info_get(struct rte_rawdev *dev,
+ __rte_unused rte_rawdev_obj_t 
dev_info,
+ __rte_unused size_t 
dev_info_size)
+{
+   if (dev == NULL)
+   return -EINVAL;
+
+   return 0;
+}
+
+static int
+zxdh_gdma_rawdev_configure(const struct rte_rawdev *dev,
+  rte_rawdev_obj_t config,
+  size_t config_size)
+{
+   struct zxdh_gdma_config *gdma_config = NULL;
+
+   if ((dev == NULL) ||
+   (config == NULL) ||
+   (config_size != sizeof(struct zxdh_gdma_config)))
+   return -EINVAL;
+
+   gdma_config = (struct zxdh_gdma_config *)config;
+   if (gdma_config->max_vqs > ZXDH_GDMA_TOTAL_CHAN_NUM) {
+   ZXDH_PMD_LOG(ERR, "gdma supports up to %d queues", 
ZXDH_GDMA_TOTAL_CHAN_NUM);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
+zxdh_gdma_rawdev_start(struct rte_rawdev *dev)
+{
+   struct zxdh_gdma_rawdev *gdmadev = NULL;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   gdmadev->device_state = ZXDH_GDMA_DEV_RUNNING;
+
+   return 0;
+}
+
+static void
+zxdh_gdma_rawdev_stop(struct rte_rawdev *dev)
+{
+   struct zxdh_gdma_rawdev *gdmadev = NULL;
+
+   if (dev == NULL)
+   return;
+
+   gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   gdmadev->device_state = ZXDH_GDMA_DEV_STOPPED;
+}
+
+static int
+zxdh_gdma_rawdev_reset(struct rte_rawdev *dev)
+{
+   if (dev == NULL)
+   return -EINVAL;
+
+   return 0;
+}
+
+static int
+zxdh_gdma_rawdev_close(struct rte_rawdev *dev)
+{
+   struct zxdh_gdma_rawdev *gdmadev = NULL;
+   struct zxdh_gdma_queue *queue = NULL;
+   uint16_t queue_id = 0;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   for (queue_id = 0; queue_id < ZXDH_GDMA_TOTAL_CHAN_NUM; queue_id++) {
+   queue = zxdh_gdma_get_queue(dev, queue_id);
+   if ((queue == NULL) || (queue->enable == 0))
+   continue;
+
+   zxdh_gdma_queue_free(dev, queue_id);
+   }
+   gdmadev = zxdh_gdma_rawdev_get_priv(dev);
+   gdmadev->device_state = ZXDH_GDMA_DEV_STOPPED;
+
+   return 0;
+}
+
 static int
 zxdh_gdma_rawdev_queue_setup(struct rte_rawdev *dev,
 uint16_t queue_id,
@@ -192,8 +282,52 @@ zxdh_gdma_rawdev_queue_setup(struct rte_rawdev *dev,
return queue_id;
 }
 
+static int
+zxdh_gdma_rawdev_queue_release(struct rte_rawdev *dev, uint16_t queue_id)
+{
+   struct zxdh_gdma_queue *queue = NULL;
+
+   if (dev == NULL)
+   return -EINVAL;
+
+   queue = zxdh_gdma_get_queue(dev, queue_id);
+   if ((queue == NULL) || (queue->enable == 0))
+   return -EINVAL;
+
+   zxdh_gdma_queue_free(dev, queue_id);
+
+   return 0;
+}
+
+static int
+zxdh_gdma_rawdev_get_attr(struct rte_rawdev *dev,
+ __rte_unused const char 
*attr_name,
+ uint64_t *attr_value)
+{
+   struct zxdh_gdma_rawdev *gdmadev = NULL;
+   struct zxdh_gdma_attr *gdma_attr = NULL;
+
+   if ((dev == NULL) || (attr_value == NULL))
+   return -EINVAL;
+
+   gdmadev   = zxdh_gdma_rawdev_get_priv(dev);
+   gdma_attr = (struct zxdh_gdma_attr *)attr_value;
+   gdma_attr->num_hw_queues = gdmadev->used_num;
+
+   return 0;
+}
 static const struct rte_rawdev_ops zxdh_gdma_rawdev_ops = {
+   .dev_info_get = zxdh_gdma_rawdev_info_get,
+   .dev_configure = zxdh_gdma_rawdev_configure,
+   .dev_start = zxdh_gdma_rawdev_start,
+   .dev_stop = zxdh_gdma_rawdev_stop,
+   .dev_close = zxdh_gdma_rawdev_close,
+   .dev_reset = zxdh_gdma_rawdev_reset,
+
.queue_setup = zxdh_gdma_rawdev_queue_setup,
+   .queue_release = zxdh_gdma_rawdev_queue_release,
+
+   .attr_get = zxdh_gdma_rawdev_get_attr,
 };
 
 static int
@@ -256,7 +390,7 @@ zxdh_gdma_queue_init(struct rte_rawdev *dev, uint16_t 
queue_id)
ZXDH_PMD_LOG(INFO, "queue%u ring phy addr:0x%"PRIx64" virt addr:%p",
 

[v2 4/5] raw/zxdh: add support for enqueue operation

2024-08-12 Thread Yong Zhang
Add rawdev enqueue operation for zxdh devices.

Signed-off-by: Yong Zhang 
---
 drivers/raw/zxdh/zxdh_rawdev.c | 220 +
 drivers/raw/zxdh/zxdh_rawdev.h |  19 +++
 2 files changed, 239 insertions(+)

diff --git a/drivers/raw/zxdh/zxdh_rawdev.c b/drivers/raw/zxdh/zxdh_rawdev.c
index 363011dfcc..a878d42c03 100644
--- a/drivers/raw/zxdh/zxdh_rawdev.c
+++ b/drivers/raw/zxdh/zxdh_rawdev.c
@@ -51,10 +51,34 @@
 /* Register offset */
 #define ZXDH_GDMA_BASE_OFFSET   0x10
 #define ZXDH_GDMA_EXT_ADDR_OFFSET   0x218
+#define ZXDH_GDMA_SAR_LOW_OFFSET0x200
+#define ZXDH_GDMA_DAR_LOW_OFFSET0x204
+#define ZXDH_GDMA_SAR_HIGH_OFFSET   0x234
+#define ZXDH_GDMA_DAR_HIGH_OFFSET   0x238
+#define ZXDH_GDMA_XFERSIZE_OFFSET   0x208
 #define ZXDH_GDMA_CONTROL_OFFSET0x230
+#define ZXDH_GDMA_TC_STATUS_OFFSET  0x0
+#define ZXDH_GDMA_STATUS_CLEAN_OFFSET   0x80
+#define ZXDH_GDMA_LLI_L_OFFSET  0x21c
+#define ZXDH_GDMA_LLI_H_OFFSET  0x220
+#define ZXDH_GDMA_CHAN_CONTINUE_OFFSET  0x224
 #define ZXDH_GDMA_TC_CNT_OFFSET 0x23c
 #define ZXDH_GDMA_LLI_USER_OFFSET   0x228
 
+/* Control register */
+#define ZXDH_GDMA_CHAN_ENABLE   0x1
+#define ZXDH_GDMA_CHAN_DISABLE  0
+#define ZXDH_GDMA_SOFT_CHAN 0x2
+#define ZXDH_GDMA_TC_INTR_ENABLE0x10
+#define ZXDH_GDMA_ALL_INTR_ENABLE   0x30
+#define ZXDH_GDMA_SBS_SHIFT 6   /* src burst size 
*/
+#define ZXDH_GDMA_SBL_SHIFT 9   /* src burst 
length */
+#define ZXDH_GDMA_DBS_SHIFT 13  /* dest burst size 
*/
+#define ZXDH_GDMA_BURST_SIZE_MIN0x1 /* 1 byte */
+#define ZXDH_GDMA_BURST_SIZE_MEDIUM 0x4 /* 4 word */
+#define ZXDH_GDMA_BURST_SIZE_MAX0x6 /* 16 word */
+#define ZXDH_GDMA_DEFAULT_BURST_LEN 0xf /* 16 beats */
+#define ZXDH_GDMA_TC_CNT_ENABLE (1 << 27)
 #define ZXDH_GDMA_CHAN_FORCE_CLOSE  (1 << 31)
 
 /* TC count & Error interrupt status register */
@@ -66,9 +90,15 @@
 #define ZXDH_GDMA_TC_CNT_CLEAN  (1)
 
 #define ZXDH_GDMA_CHAN_SHIFT0x80
+#define ZXDH_GDMA_LINK_END_NODE (1 << 30)
+#define ZXDH_GDMA_CHAN_CONTINUE (1)
+
 #define LOW32_MASK  0x
 #define LOW16_MASK  0x
 
+#define IDX_TO_ADDR(addr, idx, t) \
+   ((t)((uintptr_t)(addr) + (idx) * sizeof(struct zxdh_gdma_buff_desc)))
+
 static int zxdh_gdma_queue_init(struct rte_rawdev *dev, uint16_t queue_id);
 static int zxdh_gdma_queue_free(struct rte_rawdev *dev, uint16_t queue_id);
 
@@ -316,6 +346,194 @@ zxdh_gdma_rawdev_get_attr(struct rte_rawdev *dev,
 
return 0;
 }
+
+static inline void
+zxdh_gdma_control_cal(uint32_t *val, uint8_t tc_enable)
+{
+   *val = (ZXDH_GDMA_CHAN_ENABLE |
+   ZXDH_GDMA_SOFT_CHAN |
+   (ZXDH_GDMA_DEFAULT_BURST_LEN << ZXDH_GDMA_SBL_SHIFT) |
+   (ZXDH_GDMA_BURST_SIZE_MAX << ZXDH_GDMA_SBS_SHIFT) |
+   (ZXDH_GDMA_BURST_SIZE_MAX << ZXDH_GDMA_DBS_SHIFT));
+
+   if (tc_enable != 0)
+   *val |= ZXDH_GDMA_TC_CNT_ENABLE;
+}
+
+static inline uint32_t
+zxdh_gdma_user_get(struct zxdh_gdma_queue *queue, struct zxdh_gdma_job *job)
+{
+   uint32_t src_user = 0;
+   uint32_t dst_user = 0;
+
+   if ((job->flags & ZXDH_GDMA_JOB_DIR_MASK) == 0) {
+   ZXDH_PMD_LOG(DEBUG, "job flags:0x%x default user:0x%x",
+   job->flags, 
queue->user);
+   return queue->user;
+   } else if ((job->flags & ZXDH_GDMA_JOB_DIR_TX) != 0) {
+   src_user = ZXDH_GDMA_ZF_USER;
+   dst_user = ((job->pf_id << ZXDH_GDMA_PF_NUM_SHIFT) |
+   ((job->ep_id + ZXDH_GDMA_EPID_OFFSET) << 
ZXDH_GDMA_EP_ID_SHIFT));
+
+   if (job->vf_id != 0)
+   dst_user |= (ZXDH_GDMA_VF_EN |
+((job->vf_id - 1) << 
ZXDH_GDMA_VF_NUM_SHIFT));
+   } else {
+   dst_user = ZXDH_GDMA_ZF_USER;
+   src_user = ((job->pf_id << ZXDH_GDMA_PF_NUM_SHIFT) |
+   ((job->ep_id + ZXDH_GDMA_EPID_OFFSET) << 
ZXDH_GDMA_EP_ID_SHIFT));
+
+   if (job->vf_id != 0)
+   src_user |= (ZXDH_GDMA_VF_EN |
+((job->vf_id - 1) << 
ZXDH_GDMA_VF_NUM_SHIFT));
+   }
+   ZXDH_PMD_LOG(DEBUG, "job flags:0x%x ep_id:%u, pf_id:%u, vf_id:%u, 
user:0x%x",
+   job->flags, job->ep_id, 
job->pf_id, job->vf_id,
+

[v2 5/5] raw/zxdh: add support for dequeue operation

2024-08-12 Thread Yong Zhang
Add rawdev dequeue operation for zxdh devices.

Signed-off-by: Yong Zhang 
---
 drivers/raw/zxdh/zxdh_rawdev.c | 113 +
 1 file changed, 113 insertions(+)

diff --git a/drivers/raw/zxdh/zxdh_rawdev.c b/drivers/raw/zxdh/zxdh_rawdev.c
index a878d42c03..ccb1a241c4 100644
--- a/drivers/raw/zxdh/zxdh_rawdev.c
+++ b/drivers/raw/zxdh/zxdh_rawdev.c
@@ -96,6 +96,8 @@
 #define LOW32_MASK  0x
 #define LOW16_MASK  0x
 
+#define ZXDH_GDMA_TC_CNT_MAX0x1
+
 #define IDX_TO_ADDR(addr, idx, t) \
((t)((uintptr_t)(addr) + (idx) * sizeof(struct zxdh_gdma_buff_desc)))
 
@@ -534,6 +536,116 @@ zxdh_gdma_rawdev_enqueue_bufs(struct rte_rawdev *dev,
 
return count;
 }
+
+static inline void
+zxdh_gdma_used_idx_update(struct zxdh_gdma_queue *queue, uint16_t cnt, uint8_t 
data_bd_err)
+{
+   uint16_t idx = 0;
+
+   if (queue->sw_ring.used_idx + cnt < queue->queue_size)
+   queue->sw_ring.used_idx += cnt;
+   else
+   queue->sw_ring.used_idx = queue->sw_ring.used_idx + cnt - 
queue->queue_size;
+
+   if (data_bd_err == 1) {
+   /* Update job status, the last job status is error */
+   if (queue->sw_ring.used_idx == 0)
+   idx = queue->queue_size - 1;
+   else
+   idx = queue->sw_ring.used_idx - 1;
+
+   queue->sw_ring.job[idx]->status = 1;
+   }
+}
+
+static int
+zxdh_gdma_rawdev_dequeue_bufs(struct rte_rawdev *dev,
+   __rte_unused struct 
rte_rawdev_buf **buffers,
+   uint32_t count,
+   rte_rawdev_obj_t context)
+{
+   struct zxdh_gdma_queue *queue = NULL;
+   struct zxdh_gdma_enqdeq *e_context = NULL;
+   uint16_t queue_id = 0;
+   uint32_t val = 0;
+   uint16_t tc_cnt = 0;
+   uint16_t diff_cnt = 0;
+   uint16_t i = 0;
+   uint16_t bd_idx = 0;
+   uint64_t next_bd_addr = 0;
+   uint8_t data_bd_err = 0;
+
+   if ((dev == NULL) || (context == NULL))
+   return -EINVAL;
+
+   e_context = (struct zxdh_gdma_enqdeq *)context;
+   queue_id = e_context->vq_id;
+   queue = zxdh_gdma_get_queue(dev, queue_id);
+   if ((queue == NULL) || (queue->enable == 0))
+   return -EINVAL;
+
+   if (queue->sw_ring.pend_cnt == 0)
+   goto deq_job;
+
+   /* Get data transmit count */
+   val = zxdh_gdma_read_reg(dev, queue_id, ZXDH_GDMA_TC_CNT_OFFSET);
+   tc_cnt = val & LOW16_MASK;
+   if (tc_cnt >= queue->tc_cnt)
+   diff_cnt = tc_cnt - queue->tc_cnt;
+   else
+   diff_cnt = tc_cnt + ZXDH_GDMA_TC_CNT_MAX - queue->tc_cnt;
+
+   queue->tc_cnt = tc_cnt;
+
+   /* Data transmit error, channel stopped */
+   if ((val & ZXDH_GDMA_ERR_STATUS) != 0) {
+   next_bd_addr  = zxdh_gdma_read_reg(dev, queue_id, 
ZXDH_GDMA_LLI_L_OFFSET);
+   next_bd_addr |= ((uint64_t)zxdh_gdma_read_reg(dev, queue_id,
+   ZXDH_GDMA_LLI_H_OFFSET) 
<< 32);
+   next_bd_addr  = next_bd_addr << 6;
+   bd_idx = (next_bd_addr - queue->ring.ring_mem) / sizeof(struct 
zxdh_gdma_buff_desc);
+   if ((val & ZXDH_GDMA_SRC_DATA_ERR) || (val & 
ZXDH_GDMA_DST_ADDR_ERR)) {
+   diff_cnt++;
+   data_bd_err = 1;
+   }
+   ZXDH_PMD_LOG(INFO, "queue%d is err(0x%x) next_bd_idx:%u 
ll_addr:0x%"PRIx64" def user:0x%x",
+   queue_id, val, bd_idx, next_bd_addr, 
queue->user);
+
+   ZXDH_PMD_LOG(INFO, "Clean up error status");
+   val = ZXDH_GDMA_ERR_STATUS | ZXDH_GDMA_ERR_INTR_ENABLE;
+   zxdh_gdma_write_reg(dev, queue_id, ZXDH_GDMA_TC_CNT_OFFSET, 
val);
+
+   ZXDH_PMD_LOG(INFO, "Restart channel");
+   zxdh_gdma_write_reg(dev, queue_id, ZXDH_GDMA_XFERSIZE_OFFSET, 
0);
+   zxdh_gdma_control_cal(&val, 0);
+   zxdh_gdma_write_reg(dev, queue_id, ZXDH_GDMA_CONTROL_OFFSET, 
val);
+   }
+
+   if (diff_cnt != 0) {
+   zxdh_gdma_used_idx_update(queue, diff_cnt, data_bd_err);
+   queue->sw_ring.deq_cnt += diff_cnt;
+   queue->sw_ring.pend_cnt -= diff_cnt;
+   }
+
+deq_job:
+   if (queue->sw_ring.deq_cnt == 0)
+   return 0;
+   else if (queue->sw_ring.deq_cnt < count)
+   count = queue->sw_ring.deq_cnt;
+
+   queue->sw_ring.deq_cnt -= count;
+
+   for (i = 0; i < count; i++) {
+   e_context->job[i] = queue->sw_ring.job[queue->sw_ring.deq_idx];
+   queue->sw_ring.job[queue->sw_ring.deq_idx] = NULL;
+   if (++queue->sw_ring.deq_idx >= queue->queue_size)
+ 

Inquiry Regarding Sending Patches to DPDK

2024-08-12 Thread 王颢
Dear all,

I hope this message finds you well.

I would like to seek your advice on an issue I've encountered. Our company has 
recently enabled two-factor authentication (2FA) for our email accounts. The IT 
department has suggested that I abandon using the "git send-email" method, as 
configured through git config, to send patches to DPDK. Instead, they have 
recommended using "Exchange anonymous send mail." However, I believe this 
approach might not be feasible.

I wanted to confirm this with you and see if you could provide any guidance on 
the matter. I look forward to your response.

Thank you very much for your time and assistance.

Best regards,
Howard Wang


Re: [PATCH v16 2/5] dts: replace the or operator in third party types

2024-08-12 Thread Juraj Linkeš




On 9. 8. 2024 21:03, Jeremy Spewock wrote:

This is a funny change I wouldn't have expected the series to need. I
don't doubt that it does need it of course and I don't think there is
any harm in the change, but, out of curiosity, is this because the or
operator is coming from one of the dependencies we are installing? I
thought it was just shipped with later Python versions. Regardless,


I think this happens when Paramiko is not installed, as is likely the 
case in CI. When Paramiko is installed, the issue doesn't crop up.


I added the autodoc_mock_imports [0] configuration so that dependencies 
don't have to be installed. I don't know how exactly autodoc gets around 
the imports, but that seems to be what's causing the error when Paramiko 
is missing.


[0] 
https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html#confval-autodoc_mock_imports




Reviewed-by: Jeremy Spewock 


Re: [PATCH v16 5/5] dts: add API doc generation

2024-08-12 Thread Juraj Linkeš







--- /dev/null
+++ b/doc/api/dts/meson.build
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2023 PANTHEON.tech s.r.o.


Should this be 2023 or updated now that we're in 2024? Probably
doesn't matter too much either way.



I've talked to Thomas about copyrights and in essence, the copyright is 
going to be valid for a long time, so basically there's no need.




diff --git a/doc/api/meson.build b/doc/api/meson.build
index 5b50692df9..788129336b 100644
--- a/doc/api/meson.build
+++ b/doc/api/meson.build
@@ -1,6 +1,18 @@
  # SPDX-License-Identifier: BSD-3-Clause
  # Copyright(c) 2018 Luca Boccassi 



Should you add your copyright to the top of this file now that you've
also modified it?



Possibly, but I actually want to move almost all (or maybe all of if it 
works) of this one level deeper, so that would make too small of a 
change to warrant the inclusion of you copyright I think.



+dts_doc_targets = []
+dts_doc_target_names = []
+subdir('dts')
+
+if dts_doc_targets.length() == 0
+dts_message = 'No DTS docs targets found'
+else
+dts_message = 'Building DTS docs:'
+endif
+run_target('dts-doc', command: [echo, dts_message, dts_doc_target_names],
+depends: dts_doc_targets)
+
  doxygen = find_program('doxygen', required: get_option('enable_docs'))

  if not doxygen.found()
@@ -40,6 +52,7 @@ cdata.set('WARN_AS_ERROR', 'NO')
  if get_option('werror')
  cdata.set('WARN_AS_ERROR', 'YES')
  endif
+cdata.set('DTS_API_MAIN_PAGE', join_paths('..', 'dts', 'html', 'index.html'))

  # configure HTML Doxygen run
  html_cdata = configuration_data()
diff --git a/doc/guides/conf.py b/doc/guides/conf.py
index 0f7ff5282d..d7f3030838 100644
--- a/doc/guides/conf.py






Re: [PATCH v7 6/6] baseband/fpga_5gnr_fec: cosmetic comment changes

2024-08-12 Thread Maxime Coquelin

Hi Hernan,

Could you please rebase and resend this patch that was missed when the
series got applied? I get some conflicts.

Thanks in advance,
Maxime

On 2/8/24 17:50, Hernan Vargas wrote:

Cosmetic changes for comments.
No functional impact.

Signed-off-by: Hernan Vargas 
Reviewed-by: Maxime Coquelin 
---
  .../baseband/fpga_5gnr_fec/fpga_5gnr_fec.h|  49 ++--
  .../fpga_5gnr_fec/rte_fpga_5gnr_fec.c | 258 +-
  .../fpga_5gnr_fec/rte_pmd_fpga_5gnr_fec.h |  16 +-
  3 files changed, 160 insertions(+), 163 deletions(-)

diff --git a/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h 
b/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h
index 224684902569..6e97a3e9e2d4 100644
--- a/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h
+++ b/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h
@@ -11,7 +11,7 @@
  #include "agx100_pmd.h"
  #include "vc_5gnr_pmd.h"
  
-/* Helper macro for logging */

+/* Helper macro for logging. */
  #define rte_bbdev_log(level, fmt, ...) \
rte_log(RTE_LOG_ ## level, fpga_5gnr_fec_logtype, fmt "\n", \
##__VA_ARGS__)
@@ -24,7 +24,7 @@
  #define rte_bbdev_log_debug(fmt, ...)
  #endif
  
-/* FPGA 5GNR FEC driver names */

+/* FPGA 5GNR FEC driver names. */
  #define FPGA_5GNR_FEC_PF_DRIVER_NAME intel_fpga_5gnr_fec_pf
  #define FPGA_5GNR_FEC_VF_DRIVER_NAME intel_fpga_5gnr_fec_vf
  
@@ -43,15 +43,15 @@

  #define VC_5GNR_FPGA_VARIANT  0
  #define AGX100_FPGA_VARIANT   1
  
-/* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */

-#define N_ZC_1 66 /* N = 66 Zc for BG 1 */
-#define N_ZC_2 50 /* N = 50 Zc for BG 2 */
-#define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
-#define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
-#define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
-#define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */
-#define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */
-#define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */
+/* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2. */
+#define N_ZC_1 66 /**< N = 66 Zc for BG 1. */
+#define N_ZC_2 50 /**< N = 50 Zc for BG 2. */
+#define K0_1_1 17 /**< K0 fraction numerator for rv 1 and BG 1. */
+#define K0_1_2 13 /**< K0 fraction numerator for rv 1 and BG 2. */
+#define K0_2_1 33 /**< K0 fraction numerator for rv 2 and BG 1. */
+#define K0_2_2 25 /**< K0 fraction numerator for rv 2 and BG 2. */
+#define K0_3_1 56 /**< K0 fraction numerator for rv 3 and BG 1. */
+#define K0_3_2 43 /**< K0 fraction numerator for rv 3 and BG 2. */
  
  /* FPGA 5GNR Ring Control Registers. */

  enum {
@@ -93,7 +93,7 @@ struct __rte_packed fpga_5gnr_ring_ctrl_reg {
uint64_t ring_head_addr;
uint16_t ring_size:11;
uint16_t rsrvd0;
-   union { /* Miscellaneous register */
+   union { /* Miscellaneous register. */
uint8_t misc;
uint8_t max_ul_dec:5,
max_ul_dec_en:1,
@@ -140,26 +140,23 @@ struct fpga_5gnr_fec_device {
  
  /** Structure associated with each queue. */

  struct __rte_cache_aligned fpga_5gnr_queue {
-   struct fpga_5gnr_ring_ctrl_reg ring_ctrl_reg;  /**< Ring Control 
Register */
+   struct fpga_5gnr_ring_ctrl_reg ring_ctrl_reg;  /**< Ring Control 
Register. */
union {
/** Virtual address of VC 5GNR software ring. */
union vc_5gnr_dma_desc *vc_5gnr_ring_addr;
/** Virtual address of AGX100 software ring. */
union agx100_dma_desc *agx100_ring_addr;
};
-   uint64_t *ring_head_addr;  /* Virtual address of completion_head */
-   uint64_t shadow_completion_head; /* Shadow completion head value */
-   uint16_t head_free_desc;  /* Ring head */
-   uint16_t tail;  /* Ring tail */
-   /* Mask used to wrap enqueued descriptors on the sw ring */
-   uint32_t sw_ring_wrap_mask;
-   uint32_t irq_enable;  /* Enable ops dequeue interrupts if set to 1 */
-   uint8_t q_idx;  /* Queue index */
-   /** uuid used for MUTEX acquision for DDR */
-   uint16_t ddr_mutex_uuid;
-   struct fpga_5gnr_fec_device *d;
-   /* MMIO register of shadow_tail used to enqueue descriptors */
-   void *shadow_tail_addr;
+   uint64_t *ring_head_addr;  /**< Virtual address of completion_head. */
+   uint64_t shadow_completion_head; /**< Shadow completion head value. */
+   uint16_t head_free_desc;  /**< Ring head. */
+   uint16_t tail;  /**< Ring tail. */
+   uint32_t sw_ring_wrap_mask; /**< Mask used to wrap enqueued descriptors 
on the sw ring. */
+   uint32_t irq_enable;  /**< Enable ops dequeue interrupts if set to 1. */
+   uint8_t q_idx;  /**< Queue index. */
+   uint16_t ddr_mutex_uuid; /**< uuid used for MUTEX acquision for DDR. */
+   struct fpga_5gnr_fec_device *d; /**< FPGA 5GNR device structure. */
+   void *shadow_tail_addr; /**< MMIO register of shadow_tail used to 
enqueue descr

Re: [PATCH v1 1/3] bbdev: new queue stat for available enqueue depth

2024-08-12 Thread Maxime Coquelin

Hi Nicolas,

On 4/4/24 23:04, Nicolas Chautru wrote:

Capturing additional queue stats counter for the
depth of enqueue batch still available on the given
queue. This can help application to monitor that depth
at run time.

Signed-off-by: Nicolas Chautru 
---
  lib/bbdev/rte_bbdev.h | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..25514c58ac 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -283,6 +283,8 @@ struct rte_bbdev_stats {
 * bbdev operation
 */
uint64_t acc_offload_cycles;
+   /** Available number of enqueue batch on that queue. */
+   uint16_t enqueue_depth_avail;
  };
  
  /**


I think it needs to be documented in the ABI change section.

With that done, feel free to add:

Maxime Coquelin 

Thanks,
Maxime



Re: [PATCH v1 2/3] baseband/acc: refactor queue status update

2024-08-12 Thread Maxime Coquelin




On 4/4/24 23:04, Nicolas Chautru wrote:

Introducing common function for queue stats update
within the acc PMDs.

Signed-off-by: Nicolas Chautru 
---
  drivers/baseband/acc/acc_common.h | 18 
  drivers/baseband/acc/rte_acc100_pmd.c | 45 ++--
  drivers/baseband/acc/rte_vrb_pmd.c| 61 +--
  3 files changed, 50 insertions(+), 74 deletions(-)



Reviewed-by: Maxime Coquelin 

Thanks,
Maxime




Re: [PATCH v1 3/3] test/bbdev: update for queue stats

2024-08-12 Thread Maxime Coquelin




On 4/4/24 23:04, Nicolas Chautru wrote:

Update to include in test application the queue stats for the
enqueue_depth_avail counter.

Signed-off-by: Nicolas Chautru 
---
  app/test-bbdev/test_bbdev_perf.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index dcce00aa0a..7675c66edc 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -5702,6 +5702,7 @@ get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
stats->enqueue_warn_count = q_stats->enqueue_warn_count;
stats->dequeue_warn_count = q_stats->dequeue_warn_count;
stats->acc_offload_cycles = q_stats->acc_offload_cycles;
+   stats->enqueue_depth_avail = q_stats->enqueue_depth_avail;
  
  	return 0;

  }


Reviewed-by: Maxime Coquelin 

Thanks,
Maxime



[DPDK/testpmd Bug 1517] Add L4 port details in the verbose output

2024-08-12 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=1517

Bug ID: 1517
   Summary: Add L4 port details in the verbose output
   Product: DPDK
   Version: unspecified
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: normal
  Priority: Normal
 Component: testpmd
  Assignee: dev@dpdk.org
  Reporter: luca.vizza...@arm.com
  Target Milestone: ---

As part of the the DPDK Test Suite testing there is a requirement to identify
packets through the verbose output. An easy and reliable way to achieve this is
to set a L4 port number and check it against the results. At the moment,
testpmd only reports a destination UDP port when a VXLAN packet is found. This
ticket proposes to display the source and/or TCP/UDP ports anytime a L4
protocol is found in the packet.

-- 
You are receiving this mail because:
You are the assignee for the bug.

Re: [PATCH v1 1/3] bbdev: new queue stat for available enqueue depth

2024-08-12 Thread Maxime Coquelin




On 8/12/24 11:28, Maxime Coquelin wrote:

Hi Nicolas,

On 4/4/24 23:04, Nicolas Chautru wrote:

Capturing additional queue stats counter for the
depth of enqueue batch still available on the given
queue. This can help application to monitor that depth
at run time.

Signed-off-by: Nicolas Chautru 
---
  lib/bbdev/rte_bbdev.h | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..25514c58ac 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -283,6 +283,8 @@ struct rte_bbdev_stats {
   * bbdev operation
   */
  uint64_t acc_offload_cycles;
+    /** Available number of enqueue batch on that queue. */
+    uint16_t enqueue_depth_avail;
  };
  /**


I think it needs to be documented in the ABI change section.

With that done, feel free to add:

Maxime Coquelin 

Reviewed-by: Maxime Coquelin 


Thanks,
Maxime




Re: [PATCH 1/2] baseband/fpga_5gnr_fec: use new barrier API

2024-08-12 Thread Maxime Coquelin

Hello Nicolas,

What do we do with this series?
Do we take it for v24.11?

Thanks,
Maxime

On 2/26/24 12:03, Maxime Coquelin wrote:

Hello,

On 2/22/24 19:05, Chautru, Nicolas wrote:

Hi Maxime,

Why would we change this here and now? Is the intent not to use new 
suggested semantics for new patches only?


The pull request was rejected because of the use of such barrier, which 
is reported by checkpatch.


### [PATCH] baseband/fpga_5gnr_fec: add AGX100 support
Warning in drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c:
Using rte_smp_[r/w]mb


Are all DPDK drivers being changed?


My understanding is that for now, only new occurrences are prohibited,
can you confirm Tyler?

If so we could only change for now the patch adding ACX100.
But... I preferred doing the changes for all bbdev drivers for
consistency.

I am unsure we would want to change these drivers, this is kind of 
risk introduced by code churn that gets ecosystem unwilling to move to 
latest version.


I think it is better to change now that we are far from the next LTS.

These memory barriers issues are awful to troubleshoot or properly 
validate, so personally quite reluctant to change.


If I disassemble fpga_dequeue_enc() with and without the patch, I cannot
spot a difference.

Thomas, are you waiting for this series to be applied to take the pull
request that was initially for -rc1?

Thanks,
Maxime


Thanks
Nic


-Original Message-
From: Maxime Coquelin 
Sent: Thursday, February 22, 2024 8:21 AM
To: dev@dpdk.org; Chautru, Nicolas ; Vargas,
Hernan ; Marchand, David
; tho...@monjalon.net;
roret...@linux.microsoft.com
Cc: Maxime Coquelin 
Subject: [PATCH 1/2] baseband/fpga_5gnr_fec: use new barrier API

rte_smp_rmb() is deprecated, use the new API instead as suggested in
rte_atomic header.

Signed-off-by: Maxime Coquelin 
---
  drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c | 8 
  1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c
b/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c
index efc1d3a772..314c87350e 100644
--- a/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c
+++ b/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c
@@ -2661,7 +2661,7 @@ vc_5gnr_dequeue_ldpc_enc_one_op_cb(struct
fpga_5gnr_queue *q, struct rte_bbdev_e
  return -1;

  /* make sure the response is read atomically */
-    rte_smp_rmb();
+    rte_atomic_thread_fence(rte_memory_order_acquire);

  rte_bbdev_log_debug("DMA response desc %p", desc);

@@ -2690,7 +2690,7 @@ agx100_dequeue_ldpc_enc_one_op_cb(struct
fpga_5gnr_queue *q, struct rte_bbdev_en
  return -1;

  /* make sure the response is read atomically. */
-    rte_smp_rmb();
+    rte_atomic_thread_fence(rte_memory_order_acquire);

  rte_bbdev_log_debug("DMA response desc %p", desc);

@@ -2722,7 +2722,7 @@ vc_5gnr_dequeue_ldpc_dec_one_op_cb(struct
fpga_5gnr_queue *q, struct rte_bbdev_d
  return -1;

  /* make sure the response is read atomically */
-    rte_smp_rmb();
+    rte_atomic_thread_fence(rte_memory_order_acquire);

  #ifdef RTE_LIBRTE_BBDEV_DEBUG
  vc_5gnr_print_dma_dec_desc_debug_info(desc);
@@ -2768,7 +2768,7 @@ agx100_dequeue_ldpc_dec_one_op_cb(struct
fpga_5gnr_queue *q, struct rte_bbdev_de
  return -1;

  /* make sure the response is read atomically. */
-    rte_smp_rmb();
+    rte_atomic_thread_fence(rte_memory_order_acquire);

  #ifdef RTE_LIBRTE_BBDEV_DEBUG
  agx100_print_dma_dec_desc_debug_info(desc);
--
2.43.0






Re: [PATCH v2 1/5] eal: extend bit manipulation functionality

2024-08-12 Thread Jack Bond-Preston

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+
+__RTE_GEN_BIT_TEST(, test,, 32)
+__RTE_GEN_BIT_SET(, set,, 32)
+__RTE_GEN_BIT_CLEAR(, clear,, 32)
+__RTE_GEN_BIT_ASSIGN(, assign,, 32)
+__RTE_GEN_BIT_FLIP(, flip,, 32)
+
+__RTE_GEN_BIT_TEST(, test,, 64)
+__RTE_GEN_BIT_SET(, set,, 64)
+__RTE_GEN_BIT_CLEAR(, clear,, 64)
+__RTE_GEN_BIT_ASSIGN(, assign,, 64)
+__RTE_GEN_BIT_FLIP(, flip,, 64)


What is the purpose of the `fun` argument? As opposed to just having 
these written out in the macro definitions. I notice the atomic 
equivalents don't have this.



  /* 32-bit relaxed operations 
*/
  
  /**




Re: [PATCH v2 3/5] eal: add atomic bit operations

2024-08-12 Thread Jack Bond-Preston

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+
+#define __RTE_GEN_BIT_ATOMIC_OPS(size) \
+   __RTE_GEN_BIT_ATOMIC_TEST(size) \
+   __RTE_GEN_BIT_ATOMIC_SET(size)  \
+   __RTE_GEN_BIT_ATOMIC_CLEAR(size)\
+   __RTE_GEN_BIT_ATOMIC_ASSIGN(size)   \
+   __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(size) \
+   __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(size)   \
+   __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(size)  \
+   __RTE_GEN_BIT_ATOMIC_FLIP(size)
+
+__RTE_GEN_BIT_ATOMIC_OPS(32)
+__RTE_GEN_BIT_ATOMIC_OPS(64)


For the non-atomic operations, the arguments family and qualifier were 
added in the initial commit, and unused until the 
volatile-support-adding commit. Perhaps the atomic equivalents should be 
the same? (ie. add the family and qualifier arguments in this patch and 
don't use them until patch 5/5.



+
  /* 32-bit relaxed operations 
*/
  
  /**




Re: [PATCH v2 5/5] eal: extend bitops to handle volatile pointers

2024-08-12 Thread Jack Bond-Preston

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+#define __RTE_GEN_BIT_ATOMIC_TEST(v, qualifier, size)  \
__rte_experimental  \
static inline bool  \
-   __rte_bit_atomic_test ## size(const uint ## size ## _t *addr,   \
- unsigned int nr, int memory_order) \
+   __rte_bit_atomic_ ## v ## test ## size(const qualifier uint ## size ## 
_t *addr, \
+  unsigned int nr, int 
memory_order) \
{   \
RTE_ASSERT(nr < size);   \
\
-   const RTE_ATOMIC(uint ## size ## _t) *a_addr =  \
-   (const RTE_ATOMIC(uint ## size ## _t) *)addr;   \
+   const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+   (const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; 
\
uint ## size ## _t mask = (uint ## size ## _t)1 << nr;\
return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
}
  
-#define __RTE_GEN_BIT_ATOMIC_SET(size)	\

+#define __RTE_GEN_BIT_ATOMIC_SET(v, qualifier, size)   \
__rte_experimental  \
static inline void  \
-   __rte_bit_atomic_set ## size(uint ## size ## _t *addr,  \
-unsigned int nr, int memory_order) \
+   __rte_bit_atomic_ ## v ## set ## size(qualifier uint ## size ## _t 
*addr, \
+ unsigned int nr, int 
memory_order) \
{   \
RTE_ASSERT(nr < size);   \
\
-   RTE_ATOMIC(uint ## size ## _t) *a_addr =\
-   (RTE_ATOMIC(uint ## size ## _t) *)addr; \
+   qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =  \
+   (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
uint ## size ## _t mask = (uint ## size ## _t)1 << nr;\
rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
}
  
-#define __RTE_GEN_BIT_ATOMIC_CLEAR(size)\

+#define __RTE_GEN_BIT_ATOMIC_CLEAR(v, qualifier, size) \
__rte_experimental  \
static inline void  \
-   __rte_bit_atomic_clear ## size(uint ## size ## _t *addr,\
-  unsigned int nr, int memory_order) \
+   __rte_bit_atomic_ ## v ## clear ## size(qualifier uint ## size ## _t 
*addr, \
+   unsigned int nr, int 
memory_order) \
{   \
RTE_ASSERT(nr < size);   \
\
-   RTE_ATOMIC(uint ## size ## _t) *a_addr =\
-   (RTE_ATOMIC(uint ## size ## _t) *)addr; \
+   qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =  \
+   (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
uint ## size ## _t mask = (uint ## size ## _t)1 << nr;\
rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
}
  
-#define __RTE_GEN_BIT_ATOMIC_FLIP(size)	\

+#define __RTE_GEN_BIT_ATOMIC_FLIP(v, qualifier, size)  \
__rte_experimental  \
static inline void  \
-   __rte_bit_atomic_flip ## size(uint ## size ## _t *addr, \
-  unsigned int nr, int memory_order) \
+   __rte_bit_atomic_ ## v ## flip ## size(qualifier uint ## size ## _t 
*addr, \
+  unsigned int nr, int 
memory_order) \
{   \
RTE_ASSERT(nr < size);   \
\
-   RTE_ATOMIC(uint ## size ## _t) *a_addr =\
-   (RTE_ATOMIC(uint ## size ## _t) *)addr; \
+   qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =  \
+   (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
uint

Re: [PATCH v2 1/5] eal: extend bit manipulation functionality

2024-08-12 Thread Mattias Rönnblom

On 2024-08-12 13:16, Jack Bond-Preston wrote:

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+
+__RTE_GEN_BIT_TEST(, test,, 32)
+__RTE_GEN_BIT_SET(, set,, 32)
+__RTE_GEN_BIT_CLEAR(, clear,, 32)
+__RTE_GEN_BIT_ASSIGN(, assign,, 32)
+__RTE_GEN_BIT_FLIP(, flip,, 32)
+
+__RTE_GEN_BIT_TEST(, test,, 64)
+__RTE_GEN_BIT_SET(, set,, 64)
+__RTE_GEN_BIT_CLEAR(, clear,, 64)
+__RTE_GEN_BIT_ASSIGN(, assign,, 64)
+__RTE_GEN_BIT_FLIP(, flip,, 64)


What is the purpose of the `fun` argument? As opposed to just having 
these written out in the macro definitions. I notice the atomic 
equivalents don't have this.




It has no purpose, any more. I failed to clean that up, after removing 
the "once" family of functions.


Thanks.

  /* 32-bit relaxed operations 
*/

  /**



Re: [PATCH v2 3/5] eal: add atomic bit operations

2024-08-12 Thread Mattias Rönnblom

On 2024-08-12 13:19, Jack Bond-Preston wrote:

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+
+#define __RTE_GEN_BIT_ATOMIC_OPS(size)    \
+    __RTE_GEN_BIT_ATOMIC_TEST(size)    \
+    __RTE_GEN_BIT_ATOMIC_SET(size)    \
+    __RTE_GEN_BIT_ATOMIC_CLEAR(size)    \
+    __RTE_GEN_BIT_ATOMIC_ASSIGN(size)    \
+    __RTE_GEN_BIT_ATOMIC_TEST_AND_SET(size)    \
+    __RTE_GEN_BIT_ATOMIC_TEST_AND_CLEAR(size)    \
+    __RTE_GEN_BIT_ATOMIC_TEST_AND_ASSIGN(size)    \
+    __RTE_GEN_BIT_ATOMIC_FLIP(size)
+
+__RTE_GEN_BIT_ATOMIC_OPS(32)
+__RTE_GEN_BIT_ATOMIC_OPS(64)


For the non-atomic operations, the arguments family and qualifier were 
added in the initial commit, and unused until the 
volatile-support-adding commit. Perhaps the atomic equivalents should be 
the same? (ie. add the family and qualifier arguments in this patch and 
don't use them until patch 5/5.




Sounds like a good idea.


+
  /* 32-bit relaxed operations 
*/

  /**



Re: [PATCH v2 5/5] eal: extend bitops to handle volatile pointers

2024-08-12 Thread Mattias Rönnblom

On 2024-08-12 13:22, Jack Bond-Preston wrote:

On 09/08/2024 10:58, Mattias Rönnblom wrote:


+#define __RTE_GEN_BIT_ATOMIC_TEST(v, qualifier, size)    \
  __rte_experimental    \
  static inline bool    \
-    __rte_bit_atomic_test ## size(const uint ## size ## _t *addr,    \
-  unsigned int nr, int memory_order) \
+    __rte_bit_atomic_ ## v ## test ## size(const qualifier uint ## 
size ## _t *addr, \

+   unsigned int nr, int memory_order) \
  {    \
  RTE_ASSERT(nr < size);    \
  \
-    const RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
-    (const RTE_ATOMIC(uint ## size ## _t) *)addr;    \
+    const qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr = \
+    (const qualifier RTE_ATOMIC(uint ## size ## _t) *)addr;    \
  uint ## size ## _t mask = (uint ## size ## _t)1 << nr;    \
  return rte_atomic_load_explicit(a_addr, memory_order) & mask; \
  }
-#define __RTE_GEN_BIT_ATOMIC_SET(size)    \
+#define __RTE_GEN_BIT_ATOMIC_SET(v, qualifier, size)    \
  __rte_experimental    \
  static inline void    \
-    __rte_bit_atomic_set ## size(uint ## size ## _t *addr,    \
- unsigned int nr, int memory_order)    \
+    __rte_bit_atomic_ ## v ## set ## size(qualifier uint ## size ## 
_t *addr, \

+  unsigned int nr, int memory_order) \
  {    \
  RTE_ASSERT(nr < size);    \
  \
-    RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
-    (RTE_ATOMIC(uint ## size ## _t) *)addr;    \
+    qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
+    (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
  uint ## size ## _t mask = (uint ## size ## _t)1 << nr;    \
  rte_atomic_fetch_or_explicit(a_addr, mask, memory_order); \
  }
-#define __RTE_GEN_BIT_ATOMIC_CLEAR(size)    \
+#define __RTE_GEN_BIT_ATOMIC_CLEAR(v, qualifier, size)    \
  __rte_experimental    \
  static inline void    \
-    __rte_bit_atomic_clear ## size(uint ## size ## _t *addr,    \
-   unsigned int nr, int memory_order) \
+    __rte_bit_atomic_ ## v ## clear ## size(qualifier uint ## size ## 
_t *addr,    \

+    unsigned int nr, int memory_order) \
  {    \
  RTE_ASSERT(nr < size);    \
  \
-    RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
-    (RTE_ATOMIC(uint ## size ## _t) *)addr;    \
+    qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
+    (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
  uint ## size ## _t mask = (uint ## size ## _t)1 << nr;    \
  rte_atomic_fetch_and_explicit(a_addr, ~mask, memory_order); \
  }
-#define __RTE_GEN_BIT_ATOMIC_FLIP(size)    \
+#define __RTE_GEN_BIT_ATOMIC_FLIP(v, qualifier, size)    \
  __rte_experimental    \
  static inline void    \
-    __rte_bit_atomic_flip ## size(uint ## size ## _t *addr,    \
-   unsigned int nr, int memory_order) \
+    __rte_bit_atomic_ ## v ## flip ## size(qualifier uint ## size ## 
_t *addr, \

+   unsigned int nr, int memory_order) \
  {    \
  RTE_ASSERT(nr < size);    \
  \
-    RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
-    (RTE_ATOMIC(uint ## size ## _t) *)addr;    \
+    qualifier RTE_ATOMIC(uint ## size ## _t) *a_addr =    \
+    (qualifier RTE_ATOMIC(uint ## size ## _t) *)addr; \
  uint ## size ## _t mask = (uint ## size ## _t)1 << nr;    \
  rte_atomic_fetch_xor_explicit(a_addr, mask, memory_order); \
  }
-#define __RTE_GEN_BIT_ATOMIC_ASSIGN(size)    \
+#define __RTE_GEN_BIT_ATOMIC_ASSIGN(v, qualifier, size)    \
  __rte_experimental    \
  static inline void    \
-    __rte_bit_atomic_assign ## size(uint ## size ## _t *addr,    \
-    unsigned int nr, bool value,    \
-    int memory_order)    \
+    __rte_bit_atomic_## v ## assign ## size(qualifier uint ## size ## 
_t *addr, \

+    unsigned int nr, bool value, \
+    int memory_order)    \
  {    \
  if (value)    \
-    __rte_bit_atomic_set ## size(addr, nr, memory_order); \
+    __rte

[DPDK/ethdev Bug 1518] E810/ice: cannot create flow with simple_xor hash function

2024-08-12 Thread bugzilla
https://bugs.dpdk.org/show_bug.cgi?id=1518

Bug ID: 1518
   Summary: E810/ice: cannot create flow with simple_xor hash
function
   Product: DPDK
   Version: unspecified
  Hardware: All
OS: All
Status: UNCONFIRMED
  Severity: minor
  Priority: Normal
 Component: ethdev
  Assignee: dev@dpdk.org
  Reporter: alex.chap...@arm.com
  Target Milestone: ---

With dpdk revision: 24.07.0-rc4, I get the following error when attempting to
create a flow rule using the simple_xor algorithm.

[...]/dpdk-testpmd -l 10-19 -n 4 --file-prefix=dpdk_36732_20240812110924 -a
:01:00.0 -a :03:00.0 -- -i --port-topology=paired --rxq=16 --txq=16
--mask-event=intr_lsc'

EAL: Detected CPU lcores: 192
EAL: Detected NUMA nodes: 2
EAL: Detected static linkage of DPDK
EAL: Multi-process socket /var/run/dpdk/dpdk_36732_20240812110924/mp_socket
EAL: Selected IOVA mode 'VA'
EAL: VFIO support initialized
EAL: Using IOMMU type 1 (Type 1)
EAL: Probe PCI driver: net_ice (8086:1592) device: :01:00.0 (socket 0)
ice_load_pkg_type(): Active package is: 1.3.36.0, ICE OS Default Package
(double VLAN mode)
EAL: Probe PCI driver: net_ice (8086:1592) device: :03:00.0 (socket 0)
ice_load_pkg_type(): Active package is: 1.3.36.0, ICE OS Default Package
(double VLAN mode)
TELEMETRY: No legacy callbacks, legacy socket not created
Interactive-mode selected
...
testpmd> flow create 0 ingress pattern eth / ipv4 / udp / end actions rss types
ipv4-udp end queues end func symmetric_toeplitz / end
Flow rule #0 created
testpmd> flow create 0 ingress pattern eth / ipv4 / udp / end actions rss types
ipv4-udp end queues end func simple_xor / end
ice_flow_create(): Failed to create flow
port_flow_complain(): Caught PMD error type 10 (item specification): cause:
0xd1c69358, Invalid input set: Invalid argument

According to the datasheet[1] of the E810 NIC simple_xor is supported.
Had a brief looking into the ice driver, and it seems to have support for
simple_xor.
Both toeplitz and symmetric_toeplitz work correctly.

[1]:
https://www.intel.com/content/www/us/en/content-details/613875/intel-ethernet-controller-e810-datasheet.html

-- 
You are receiving this mail because:
You are the assignee for the bug.

[PATCH v3 0/5] Improve EAL bit operations API

2024-08-12 Thread Mattias Rönnblom
This patch set represent an attempt to improve and extend the RTE
bitops API, in particular for functions that operate on individual
bits.

All new functionality is exposed to the user as generic selection
macros, delegating the actual work to private (__-marked) static
inline functions. Public functions (e.g., rte_bit_set32()) would just
be bloating the API. Such generic selection macros will here be
referred to as "functions", although technically they are not.

The legacy  rte_bit_relaxed_*() functions is replaced
with two new families:

rte_bit_[test|set|clear|assign|flip]() which provides no memory
ordering or atomicity guarantees, but does provide the best
performance. The performance degradation resulting from the use of
volatile (e.g., forcing loads and stores to actually occur and in the
number specified) and atomic (e.g., LOCK-prefixed instructions on x86)
may be significant. rte_bit_[test|set|clear|assign|flip]() may be
used with volatile word pointers, in which case they guarantee
that the program-level accesses actually occur.

rte_bit_atomic_*() which provides atomic bit-level operations,
including the possibility to specifying memory ordering constraints
(or the lack thereof).

The atomic functions take non-_Atomic pointers, to be flexible, just
like the GCC builtins and default . The issue with
_Atomic APIs is that it may well be the case that the user wants to
perform both non-atomic and atomic operations on the same word.

Having _Atomic-marked addresses would complicate supporting atomic
bit-level operations in the bitset API (proposed in a different RFC
patchset), and potentially other APIs depending on RTE bitops for
atomic bit-level ops). Either one needs two bitset variants, one
_Atomic bitset and one non-atomic one, or the bitset code needs to
cast the non-_Atomic pointer to an _Atomic one. Having a separate
_Atomic bitset would be bloat and also prevent the user from both, in
some situations, doing atomic operations against a bit set, while in
other situations (e.g., at times when MT safety is not a concern)
operating on the same objects in a non-atomic manner.

Unlike rte_bit_relaxed_*(), individual bits are represented by bool,
not uint32_t or uint64_t. The author found the use of such large types
confusing, and also failed to see any performance benefits.

A set of functions rte_bit_*_assign() are added, to assign a
particular boolean value to a particular bit.

All new functions have properly documented semantics.

All new functions operate on both 32 and 64-bit words, with type
checking.

_Generic allow the user code to be a little more impact. Have a
type-generic atomic test/set/clear/assign bit API also seems
consistent with the "core" (word-size) atomics API, which is generic
(both GCC builtins and  are).

The _Generic versions avoids having explicit unsigned long versions of
all functions. If you have an unsigned long, it's safe to use the
generic version (e.g., rte_set_bit()) and _Generic will pick the right
function, provided long is either 32 or 64 bit on your platform (which
it is on all DPDK-supported ABIs).

The generic rte_bit_set() is a macro, and not a function, but
nevertheless has been given a lower-case name. That's how C11 does it
(for atomics, and other _Generic), and . Its address
can't be taken, but it does not evaluate its parameters more than
once.

C++ doesn't support generic selection. In C++ translation units the
_Generic macros are replaced with overloaded functions, implemented by
means of a huge, complicated C macro mess.

Mattias Rönnblom (5):
  eal: extend bit manipulation functionality
  eal: add unit tests for bit operations
  eal: add atomic bit operations
  eal: add unit tests for atomic bit access functions
  eal: extend bitops to handle volatile pointers

 app/test/test_bitops.c | 416 +-
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h   | 768 -
 3 files changed, 1183 insertions(+), 18 deletions(-)

-- 
2.34.1



[PATCH v3 2/5] eal: add unit tests for bit operations

2024-08-12 Thread Mattias Rönnblom
Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

--

RFC v6:
 * Test rte_bit_*test() usage through const pointers.

RFC v4:
 * Remove redundant line continuations.
---
 app/test/test_bitops.c | 85 ++
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 0d4ccfb468..322f58c066 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -1,13 +1,68 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2019 Arm Limited
+ * Copyright(c) 2024 Ericsson AB
  */
 
+#include 
+
 #include 
 #include 
+#include 
 #include "test.h"
 
-uint32_t val32;
-uint64_t val64;
+#define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun, \
+   flip_fun, test_fun, size)   \
+   static int  \
+   test_name(void) \
+   {   \
+   uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
+   unsigned int bit_nr;\
+   uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+   \
+   for (bit_nr = 0; bit_nr < size; bit_nr++) { \
+   bool reference_bit = (reference >> bit_nr) & 1; \
+   bool assign = rte_rand() & 1;   \
+   if (assign) \
+   assign_fun(&word, bit_nr, reference_bit); \
+   else {  \
+   if (reference_bit)  \
+   set_fun(&word, bit_nr); \
+   else\
+   clear_fun(&word, bit_nr);   \
+   \
+   }   \
+   TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+   "Bit %d had unexpected value", bit_nr); \
+   flip_fun(&word, bit_nr);\
+   TEST_ASSERT(test_fun(&word, bit_nr) != reference_bit, \
+   "Bit %d had unflipped value", bit_nr); \
+   flip_fun(&word, bit_nr);\
+   \
+   const uint ## size ## _t *const_ptr = &word;\
+   TEST_ASSERT(test_fun(const_ptr, bit_nr) ==  \
+   reference_bit,  \
+   "Bit %d had unexpected value", bit_nr); \
+   }   \
+   \
+   for (bit_nr = 0; bit_nr < size; bit_nr++) { \
+   bool reference_bit = (reference >> bit_nr) & 1; \
+   TEST_ASSERT(test_fun(&word, bit_nr) == reference_bit, \
+   "Bit %d had unexpected value", bit_nr); \
+   }   \
+   \
+   TEST_ASSERT(reference == word, "Word had unexpected value"); \
+   \
+   return TEST_SUCCESS;\
+   }
+
+GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+
+static uint32_t val32;
+static uint64_t val64;
 
 #define MAX_BITS_32 32
 #define MAX_BITS_64 64
@@ -117,22 +172,22 @@ test_bit_relaxed_test_set_clear(void)
return TEST_SUCCESS;
 }
 
+static struct unit_test_suite test_suite = {
+   .suite_name = "Bitops test suite",
+   .unit_test_cases = {
+   TEST_CASE(test_bit_access32),
+   TEST_CASE(test_bit_access64),
+   TEST_CASE(test_bit_relaxed_set),
+   TEST_CASE(test_bit_relaxed_clear),
+   TEST_CASE(test_bit_relaxed_test_set_clear),
+   TEST_CASES_END()
+   }
+};
+
 

[PATCH v3 1/5] eal: extend bit manipulation functionality

2024-08-12 Thread Mattias Rönnblom
Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

--

PATCH v3:
 * Remove unnecessary  include.
 * Remove redundant 'fun' parameter from the __RTE_GEN_BIT_*() macros
   (Jack Bond-Preston).
 * Introduce __RTE_BIT_BIT_OPS() macro, consistent with how things
   are done when generating the atomic bit operations.
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).

RFC v6:
 * Have rte_bit_test() accept const-marked bitsets.

RFC v4:
 * Add rte_bit_flip() which, believe it or not, flips the value of a bit.
 * Mark macro-generated private functions as experimental.
 * Use macros to generate *assign*() functions.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).
 * Fix ','-related checkpatch warnings.
---
 lib/eal/include/rte_bitops.h | 260 ++-
 1 file changed, 258 insertions(+), 2 deletions(-)

diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 449565eeae..6915b945ba 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -2,6 +2,7 @@
  * Copyright(c) 2020 Arm Limited
  * Copyright(c) 2010-2019 Intel Corporation
  * Copyright(c) 2023 Microsoft Corporation
+ * Copyright(c) 2024 Ericsson AB
  */
 
 #ifndef _RTE_BITOPS_H_
@@ -11,12 +12,14 @@
  * @file
  * Bit Operations
  *
- * This file defines a family of APIs for bit operations
- * without enforcing memory ordering.
+ * This file provides functionality for low-level, single-word
+ * arithmetic and bit-level operations, such as counting or
+ * setting individual bits.
  */
 
 #include 
 
+#include 
 #include 
 
 #ifdef __cplusplus
@@ -105,6 +108,197 @@ extern "C" {
 #define RTE_FIELD_GET64(mask, reg) \
((typeof(mask))(((reg) & (mask)) >> rte_ctz64(mask)))
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test bit in word.
+ *
+ * Generic selection macro to test the value of a bit in a 32-bit or
+ * 64-bit word. The type of operation depends on the type of the @c
+ * addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_test(addr, nr) \
+   _Generic((addr),\
+   uint32_t *: __rte_bit_test32,   \
+   const uint32_t *: __rte_bit_test32, \
+   uint64_t *: __rte_bit_test64,   \
+   const uint64_t *: __rte_bit_test64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set bit in word.
+ *
+ * Generic selection macro to set a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_set(addr, nr)  \
+   _Generic((addr),\
+uint32_t *: __rte_bit_set32,   \
+uint64_t *: __rte_bit_set64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Clear bit in word.
+ *
+ * Generic selection macro to clear a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr
+ * parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ */
+#define rte_bit_clear(addr, nr)\
+   _Generic((addr),\
+uint32_t *: __rte_bit_clear32, \
+uint64_t *: __rte_bit_clear64)(addr, nr)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Assign a value to a bit in word.
+ *
+ * Generic selection macro to assign a value to a bit in a 32-bit or 64-bit
+ * word. The type of operation depends on the type of the @c addr parameter.
+ *
+ * This macro does not give any guarantees in regards to memory
+ * ordering or atomicity.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param value
+ *   The new value of the bit - true for '1', or false for '0'.
+ */
+#define rte_bit_assign(addr, nr, value)  

[PATCH v3 5/5] eal: extend bitops to handle volatile pointers

2024-08-12 Thread Mattias Rönnblom
Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 

--

PATCH v3:
 * Updated to reflect removed 'fun' parameter in __RTE_GEN_BIT_*()
   (Jack Bond-Preston).

PATCH v2:
 * Actually run the test_bit_atomic_v_access*() test functions.
---
 app/test/test_bitops.c   |  32 +++-
 lib/eal/include/rte_bitops.h | 301 +++
 2 files changed, 222 insertions(+), 111 deletions(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index b80216a0a1..10e87f6776 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -14,13 +14,13 @@
 #include "test.h"
 
 #define GEN_TEST_BIT_ACCESS(test_name, set_fun, clear_fun, assign_fun, \
-   flip_fun, test_fun, size)   \
+   flip_fun, test_fun, size, mod)  \
static int  \
test_name(void) \
{   \
uint ## size ## _t reference = (uint ## size ## _t)rte_rand(); \
unsigned int bit_nr;\
-   uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
+   mod uint ## size ## _t word = (uint ## size ## _t)rte_rand(); \
\
for (bit_nr = 0; bit_nr < size; bit_nr++) { \
bool reference_bit = (reference >> bit_nr) & 1; \
@@ -41,7 +41,7 @@
"Bit %d had unflipped value", bit_nr); \
flip_fun(&word, bit_nr);\
\
-   const uint ## size ## _t *const_ptr = &word;\
+   const mod uint ## size ## _t *const_ptr = &word; \
TEST_ASSERT(test_fun(const_ptr, bit_nr) ==  \
reference_bit,  \
"Bit %d had unexpected value", bit_nr); \
@@ -59,10 +59,16 @@
}
 
 GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
-   rte_bit_assign, rte_bit_flip, rte_bit_test, 32)
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
-   rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access32, rte_bit_set, rte_bit_clear,
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_v_access64, rte_bit_set, rte_bit_clear,
+   rte_bit_assign, rte_bit_flip, rte_bit_test, 64, volatile)
 
 #define bit_atomic_set(addr, nr)   \
rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
@@ -81,11 +87,19 @@ GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, 
rte_bit_clear,
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
bit_atomic_clear, bit_atomic_assign,
-   bit_atomic_flip, bit_atomic_test, 32)
+   bit_atomic_flip, bit_atomic_test, 32,)
 
 GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
bit_atomic_clear, bit_atomic_assign,
-   bit_atomic_flip, bit_atomic_test, 64)
+   bit_atomic_flip, bit_atomic_test, 64,)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access32, bit_atomic_set,
+   bit_atomic_clear, bit_atomic_assign,
+   bit_atomic_flip, bit_atomic_test, 32, volatile)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_v_access64, bit_atomic_set,
+   bit_atomic_clear, bit_atomic_assign,
+   bit_atomic_flip, bit_atomic_test, 64, volatile)
 
 #define PARALLEL_TEST_RUNTIME 0.25
 
@@ -480,8 +494,12 @@ static struct unit_test_suite test_suite = {
TEST_CASE(test_bit_access64),
TEST_CASE(test_bit_access32),
TEST_CASE(test_bit_access64),
+   TEST_CASE(test_bit_v_access32),
+   TEST_CASE(test_bit_v_access64),
TEST_CASE(test_bit_atomic_access32),
TEST_CASE(test_bit_atomic_access64),
+   TEST_CASE(test_bit_atomic_v_access32),
+   TEST_CASE(test_bit_atomic_v_access64),
TEST_CASE(test_bit_atomic_parallel_assign32),
TEST_CASE(test_bit_atomic_parallel_assign64),
TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 3ad6795fd1..d7a07c4099 100644
--- a/lib/e

[PATCH v3 3/5] eal: add atomic bit operations

2024-08-12 Thread Mattias Rönnblom
Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

--

PATCH v3:
 * Introduce __RTE_GEN_BIT_ATOMIC_*() 'qualifier' argument already in
   this patch (Jack Bond-Preston).
 * Refer to volatile bit op functions as variants instead of families
   (macro parameter naming).
 * Update release notes.

PATCH:
 * Add missing macro #undef for C++ version of atomic bit flip.

RFC v7:
 * Replace compare-exchange-based rte_bitset_atomic_test_and_*() and
   flip() with implementations that use the previous value as returned
   by the atomic fetch function.
 * Reword documentation to match the non-atomic macro variants.
 * Remove pointer to  for memory model documentation,
   since there is no documentation for that API.

RFC v6:
 * Have rte_bit_atomic_test() accept const-marked bitsets.

RFC v4:
 * Add atomic bit flip.
 * Mark macro-generated private functions experimental.

RFC v3:
 * Work around lack of C++ support for _Generic (Tyler Retzlaff).

RFC v2:
 o Add rte_bit_atomic_test_and_assign() (for consistency).
 o Fix bugs in rte_bit_atomic_test_and_[set|clear]().
 o Use  to support MSVC.
---
 doc/guides/rel_notes/release_24_11.rst |  17 +
 lib/eal/include/rte_bitops.h   | 415 +
 2 files changed, 432 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..3111b1e4c0 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -56,6 +56,23 @@ New Features
  ===
 
 
+* **Extended bit operations API.**
+
+  The support for bit-level operations on single 32- and 64-bit words
+  in  has been extended with two families of
+  semantically well-defined functions.
+
+  rte_bit_[test|set|clear|assign|flip]() functions provide excellent
+  performance (by avoiding restricting the compiler and CPU), but give
+  no guarantees in regards to memory ordering or atomicity.
+
+  rte_bit_atomic_*() provides atomic bit-level operations, including
+  the possibility to specifying memory ordering constraints.
+
+  The new public API elements are polymorphic, using the _Generic-
+  based macros (for C) and function overloading (in C++ translation
+  units).
+
 Removed Items
 -
 
diff --git a/lib/eal/include/rte_bitops.h b/lib/eal/include/rte_bitops.h
index 6915b945ba..3ad6795fd1 100644
--- a/lib/eal/include/rte_bitops.h
+++ b/lib/eal/include/rte_bitops.h
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 
 #ifdef __cplusplus
 extern "C" {
@@ -226,6 +227,204 @@ extern "C" {
 uint32_t *: __rte_bit_flip32,  \
 uint64_t *: __rte_bit_flip64)(addr, nr)
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Test if a particular bit in a word is set with a particular memory
+ * order.
+ *
+ * Test a bit with the resulting memory load ordered as per the
+ * specified memory order.
+ *
+ * @param addr
+ *   A pointer to the word to query.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ * @return
+ *   Returns true if the bit is set, and false otherwise.
+ */
+#define rte_bit_atomic_test(addr, nr, memory_order)\
+   _Generic((addr),\
+uint32_t *: __rte_bit_atomic_test32,   \
+const uint32_t *: __rte_bit_atomic_test32, \
+uint64_t *: __rte_bit_atomic_test64,   \
+const uint64_t *: __rte_bit_atomic_test64)(addr, nr,   \
+   memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically set bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '1', with the memory ordering as
+ * specified by @c memory_order.
+ *
+ * @param addr
+ *   A pointer to the word to modify.
+ * @param nr
+ *   The index of the bit.
+ * @param memory_order
+ *   The memory order to use.
+ */
+#define rte_bit_atomic_set(addr, nr, memory_order) \
+   _Generic((addr),\
+uint32_t *: __rte_bit_atomic_set32,\
+uint64_t *: __rte_bit_atomic_set64)(addr, nr, memory_order)
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Atomically clear bit in word.
+ *
+ * Generic selection macro to atomically set bit specified by @c nr in
+ * the word pointed to by @c addr to '0', with the memory ordering as
+ * specified by

[PATCH v3 4/5] eal: add unit tests for atomic bit access functions

2024-08-12 Thread Mattias Rönnblom
Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 -
 1 file changed, 312 insertions(+), 1 deletion(-)

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include 
 #include 
 
-#include 
 #include 
+#include 
+#include 
+#include 
 #include 
 #include "test.h"
 
@@ -61,6 +64,304 @@ GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, 
rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)   \
+   rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr) \
+   rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value) \
+   rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)  \
+rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)  \
+   rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+   bit_atomic_clear, bit_atomic_assign,
+   bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+   bit_atomic_clear, bit_atomic_assign,
+   bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size) \
+   \
+   struct parallel_access_lcore ## size\
+   {   \
+   unsigned int bit;   \
+   uint ## size ##_t *word;\
+   bool failed;\
+   };  \
+   \
+   static int  \
+   run_parallel_assign ## size(void *arg)  \
+   {   \
+   struct parallel_access_lcore ## size *lcore = arg;  \
+   uint64_t deadline = rte_get_timer_cycles() +\
+   PARALLEL_TEST_RUNTIME * rte_get_timer_hz(); \
+   bool value = false; \
+   \
+   do {\
+   bool new_value = rte_rand() & 1;\
+   bool use_test_and_modify = rte_rand() & 1;  \
+   bool use_assign = rte_rand() & 1;   \
+   \
+   if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+   rte_memory_order_relaxed) != 
value) { \
+   lcore->failed = true;   \
+   break;  \
+   }   \
+   \
+   if (use_test_and_modify) {  \
+   bool old_value; \
+   if (use_assign) \
+   old_value = 
rte_bit_atomic_test_and_assign( \
+   lcore->word, lcore->bit, 
new_value, \
+   rte_memory_order_relaxed); \
+   else {  \
+   old_value = new_value ? \
+   rte_bit_atomic_test_and_set( \
+   lcore->word, 
lcore->bit, \
+   
rte_memory_order_relaxed) : \
+ 

Re: [PATCH v3 1/5] eal: extend bit manipulation functionality

2024-08-12 Thread Jack Bond-Preston

On 12/08/2024 13:49, Mattias Rönnblom wrote:

Add functionality to test and modify the value of individual bits in
32-bit or 64-bit words.

These functions have no implications on memory ordering, atomicity and
does not use volatile and thus does not prevent any compiler
optimizations.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 


Acked-by: Jack Bond-Preston 



Re: [PATCH v3 2/5] eal: add unit tests for bit operations

2024-08-12 Thread Jack Bond-Preston

On 12/08/2024 13:49, Mattias Rönnblom wrote:

Extend bitops tests to cover the
rte_bit_[test|set|clear|assign|flip]()
functions.

The tests are converted to use the test suite runner framework.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

Acked-by: Jack Bond-Preston 




Re: [PATCH v3 3/5] eal: add atomic bit operations

2024-08-12 Thread Jack Bond-Preston

On 12/08/2024 13:49, Mattias Rönnblom wrote:

Add atomic bit test/set/clear/assign/flip and
test-and-set/clear/assign/flip functions.

All atomic bit functions allow (and indeed, require) the caller to
specify a memory order.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

Acked-by: Jack Bond-Preston 



Re: [PATCH v3 4/5] eal: add unit tests for atomic bit access functions

2024-08-12 Thread Jack Bond-Preston

On 12/08/2024 13:49, Mattias Rönnblom wrote:

Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 
Acked-by: Tyler Retzlaff 

Acked-by: Jack Bond-Preston 




Re: [PATCH v3 5/5] eal: extend bitops to handle volatile pointers

2024-08-12 Thread Jack Bond-Preston

On 12/08/2024 13:49, Mattias Rönnblom wrote:

Have rte_bit_[test|set|clear|assign|flip]() and rte_bit_atomic_*()
handle volatile-marked pointers.

Signed-off-by: Mattias Rönnblom 
Acked-by: Morten Brørup 

Acked-by: Jack Bond-Preston 




[RFC PATCH] app/testpmd: display TM parameters when adding nodes

2024-08-12 Thread Bruce Richardson
The commands to add TM nodes and shapers take many parameters without
any descriptive words in between to identify what parameter is what. To
help make it clear what a command is actually doing, and to help catch
any issues with parameters put in the wrong order, print out the
parameters for each command after it is entered.

Signed-off-by: Bruce Richardson 
---
 app/test-pmd/cmdline_tm.c | 60 +--
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/app/test-pmd/cmdline_tm.c b/app/test-pmd/cmdline_tm.c
index 6ce074f538..aa917edb6c 100644
--- a/app/test-pmd/cmdline_tm.c
+++ b/app/test-pmd/cmdline_tm.c
@@ -882,8 +882,21 @@ static cmdline_parse_token_num_t 
cmd_add_port_tm_node_shaper_profile_packet_mode
struct cmd_add_port_tm_node_shaper_profile_result,
pkt_mode, RTE_UINT32);
 
+static int
+get_printable_rate(uint64_t rate, char *buffer, size_t size)
+{
+   if (rate >= 1000 * 1000 * 1000)
+   return snprintf(buffer, size, "%.1fG", rate / (1000 * 1000 * 
1000.0));
+   else if (rate >= 1000 * 1000)
+   return snprintf(buffer, size, "%.1fM", rate / (1000 * 1000.0));
+   else if (rate >= 1000)
+   return snprintf(buffer, size, "%.1fK", rate / 1000.0);
+   else
+   return snprintf(buffer, size, "%"PRIu64, rate);
+}
+
 static void cmd_add_port_tm_node_shaper_profile_parsed(void *parsed_result,
-   __rte_unused struct cmdline *cl,
+   struct cmdline *cl,
__rte_unused void *data)
 {
struct cmd_add_port_tm_node_shaper_profile_result *res = parsed_result;
@@ -892,8 +905,20 @@ static void 
cmd_add_port_tm_node_shaper_profile_parsed(void *parsed_result,
uint32_t shaper_id = res->shaper_id;
uint32_t pkt_len_adjust = res->pktlen_adjust;
portid_t port_id = res->port_id;
+   char rate_str[20];
int ret;
 
+   cmdline_printf(cl, "adding node shaper on port %u, with id %u\n", 
res->port_id, shaper_id);
+   get_printable_rate(res->cmit_tb_rate, rate_str, sizeof(rate_str));
+   cmdline_printf(cl, "# committed rate: %s, t.b. size: %"PRIu64"\n",
+   rate_str, res->cmit_tb_size);
+
+   get_printable_rate(res->peak_tb_rate, rate_str, sizeof(rate_str));
+   cmdline_printf(cl, "# peak rate: %s, t.b. size: %"PRIu64"\n",
+   rate_str, res->peak_tb_size);
+   cmdline_printf(cl, "# pkt length adjust: %u\n", res->pktlen_adjust);
+   cmdline_printf(cl, "# packet mode: %s\n", res->pkt_mode ? "true" : 
"false (bytes mode)");
+
if (port_id_is_invalid(port_id, ENABLED_WARN))
return;
 
@@ -1635,7 +1660,7 @@ static cmdline_parse_token_string_t
 multi_shared_shaper_id, TOKEN_STRING_MULTI);
 
 static void cmd_add_port_tm_nonleaf_node_parsed(void *parsed_result,
-   __rte_unused struct cmdline *cl,
+   struct cmdline *cl,
__rte_unused void *data)
 {
struct cmd_add_port_tm_nonleaf_node_result *res = parsed_result;
@@ -1659,6 +1684,20 @@ static void cmd_add_port_tm_nonleaf_node_parsed(void 
*parsed_result,
else
parent_node_id = res->parent_node_id;
 
+   if (parent_node_id != UINT32_MAX)
+   cmdline_printf(cl, "adding node on port %u, with id %u and 
parent node %u\n",
+   port_id, res->node_id, parent_node_id);
+   else
+   cmdline_printf(cl, "adding node on port %u, with id %u as root 
node\n",
+   port_id, res->node_id);
+   cmdline_printf(cl, "# priority: %u\n", res->priority);
+   cmdline_printf(cl, "# weight: %u\n", res->weight);
+   cmdline_printf(cl, "# level_id: %u\n", res->level_id);
+   cmdline_printf(cl, "# shaper_profile_id: %d\n", res->shaper_profile_id);
+   cmdline_printf(cl, "# num SP priorities: %u\n", res->n_sp_priorities);
+   cmdline_printf(cl, "# stats_mask: %"PRIx64"\n", res->stats_mask);
+   cmdline_printf(cl, "# shared shapers: '%s'\n", s_str);
+
shared_shaper_id = (uint32_t *)malloc(MAX_NUM_SHARED_SHAPERS *
sizeof(uint32_t));
if (shared_shaper_id == NULL) {
@@ -1964,7 +2003,7 @@ static cmdline_parse_token_string_t
 multi_shared_shaper_id, TOKEN_STRING_MULTI);
 
 static void cmd_add_port_tm_leaf_node_parsed(void *parsed_result,
-   __rte_unused struct cmdline *cl,
+   struct cmdline *cl,
__rte_unused void *data)
 {
struct cmd_add_port_tm_leaf_node_result *res = parsed_result;
@@ -1988,6 +2027,21 @@ static void cmd_add_port_tm_leaf_node_parsed(void 
*parsed_result,
else
parent_node_id = res->parent_node_id;
 
+   if (parent_node_id != UINT32_MAX)
+   cmdline_printf(cl, "adding leaf node on port %u, with id %u and 
parent node %u\n",
+   port_id, res->node_id, parent_node_id);
+   else
+   cmdline_prin

[RFC PATCH] config: make queues per port a meson config option

2024-08-12 Thread Bruce Richardson
The default number of ethernet queues per port is currently set to
1k which is more than enough for most applications, but still is lower
than the total number of queues which may be available on modern NICs.
Rather than increasing the max queues further, which will increase
the memory footprint (since the value is used in array dimensioning),
we can instead make the value a meson tunable option - and reduce the
default value to 256 in the process. This means that:

* most apps which don't need hundreds of queues will see lower mem use.
* apps which do need to use thousands of queues can configure DPDK to
  allow this, without having to modify DPDK files (i.e. rte_config.h)

Signed-off-by: Bruce Richardson 
---
 config/meson.build  | 1 +
 config/rte_config.h | 1 -
 meson_options.txt   | 2 ++
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/config/meson.build b/config/meson.build
index 8c8b019c25..e9e40ce874 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -352,6 +352,7 @@ endforeach
 
 # set other values pulled from the build options
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
+dpdk_conf.set('RTE_MAX_QUEUES_PER_PORT', get_option('max_queues_per_ethport'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
 dpdk_conf.set('RTE_ENABLE_STDATOMIC', get_option('enable_stdatomic'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
diff --git a/config/rte_config.h b/config/rte_config.h
index dd7bb0d35b..eec1932d0f 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -64,7 +64,6 @@
 #define RTE_MBUF_DEFAULT_MEMPOOL_OPS "ring_mp_mc"
 
 /* ether defines */
-#define RTE_MAX_QUEUES_PER_PORT 1024
 #define RTE_ETHDEV_QUEUE_STAT_CNTRS 16 /* max 256 */
 #define RTE_ETHDEV_RXTX_CALLBACKS 1
 #define RTE_MAX_MULTI_HOST_CTRLS 4
diff --git a/meson_options.txt b/meson_options.txt
index e49b2fc089..e5e94dc4bf 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -44,6 +44,8 @@ option('max_lcores', type: 'string', value: 'default', 
description:
'Set maximum number of cores/threads supported by EAL; "default" is 
different per-arch, "detect" detects the number of cores on the build machine.')
 option('max_numa_nodes', type: 'string', value: 'default', description:
'Set the highest NUMA node supported by EAL; "default" is different 
per-arch, "detect" detects the highest NUMA node on the build machine.')
+option('max_queues_per_ethport', type: 'integer', value: 256, description:
+   'maximum number of queues on an Ethernet device')
 option('enable_iova_as_pa', type: 'boolean', value: true, description:
'Support the use of physical addresses for IO addresses, such as used 
by UIO or VFIO in no-IOMMU mode. When disabled, DPDK can only run with IOMMU 
support for address mappings, but will have more space available in the mbuf 
structure.')
 option('mbuf_refcnt_atomic', type: 'boolean', value: true, description:
-- 
2.43.0



[RFC v1 0/2] dts: initial checksum offload suite

2024-08-12 Thread Dean Marx
Test suite for verifying checksum hardware offload through the
PMD works as expected. This is done by checking the verbose output in
testpmd while in csum forwarding mode, specifically the ol_flags
section, to ensure they match the flags in the test plan. However, 
there are a few issues I noticed while writing the suite that made
me hesitant to submit a patch:

1. SCTP hardware offload is not supported on any of the NICs I tested
on. I've tried this on mlx5, i40e, and bnxt drivers and none of them
support it. SCTP offload is used as part of almost every test case, so I
removed SCTP packets from the suite entirely. I intend to keep it that
way unless anyone is able to use the command "csum set sctp hw 0"
without an "SCTP not supported" error.
2. There are two Tx checksum test cases, which involve checking the Tx
flags section of verbose output to ensure they match the ones in the
test plan. However, the Tx flags don't appear to change at all
depending on what packet you send to testpmd, which leaves me with no
way to verify correct behavior. I'm considering removing the Tx cases
entirely, but they are a large chunk of the suite so if anyone disagrees
I can look for more of a workaround.

If anyone has any comments or advice about the issues above it is
greatly appreciated.

Dean Marx (2):
  dts: add csum HW offload to testpmd shell
  dts: checksum offload test suite

 dts/framework/config/conf_yaml_schema.json|   3 +-
 dts/framework/remote_session/testpmd_shell.py |  94 ++
 dts/tests/TestSuite_checksum_offload.py   | 288 ++
 3 files changed, 384 insertions(+), 1 deletion(-)
 create mode 100644 dts/tests/TestSuite_checksum_offload.py

-- 
2.44.0



[PATCH v1 1/2] dts: add csum HW offload to testpmd shell

2024-08-12 Thread Dean Marx
add csum_set_hw method to testpmd shell class. Port over
set_verbose and port start/stop from queue start/stop suite.

Signed-off-by: Dean Marx 
---
 dts/framework/remote_session/testpmd_shell.py | 94 +++
 1 file changed, 94 insertions(+)

diff --git a/dts/framework/remote_session/testpmd_shell.py 
b/dts/framework/remote_session/testpmd_shell.py
index 43e9f56517..be8fbdc295 100644
--- a/dts/framework/remote_session/testpmd_shell.py
+++ b/dts/framework/remote_session/testpmd_shell.py
@@ -806,6 +806,100 @@ def show_port_stats(self, port_id: int) -> 
TestPmdPortStats:
 
 return TestPmdPortStats.parse(output)
 
+def port_stop(self, port: int, verify: bool = True):
+"""Stop specified port.
+
+Args:
+port: Specifies the port number to use, must be between 0-32.
+verify: If :data:`True`, the output of the command is scanned
+to ensure specified port is stopped. If not, it is considered
+an error.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
the port
+is not stopped."""
+port_output = self.send_command(f"port stop {port}")
+if verify:
+if "Done" not in port_output:
+self._logger.debug(f"Failed to stop port {port}: 
\n{port_output}")
+raise InteractiveCommandExecutionError(f"Testpmd failed to 
stop port {port}.")
+
+def port_start(self, port: int, verify: bool = True):
+"""Start specified port.
+
+Args:
+port: Specifies the port number to use, must be between 0-32.
+verify: If :data:`True`, the output of the command is scanned
+to ensure specified port is started. If not, it is considered
+an error.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
the port
+is not started."""
+port_output = self.send_command(f"port start {port}")
+if verify:
+if "Done" not in port_output:
+self._logger.debug(f"Failed to start port {port}: 
\n{port_output}")
+raise InteractiveCommandExecutionError(f"Testpmd failed to 
start port {port}.")
+
+def csum_set_hw(self, layer: str, port_id: int, verify: bool = True) -> 
None:
+"""Enables hardware checksum offloading on the specified layer.
+
+Args:
+layer: The layer that checksum offloading should be enabled on.
+options: tcp, ip, udp, sctp, outer-ip, outer-udp.
+port_id: The port number to enable checksum offloading on, should 
be within 0-32.
+verify: If :data:`True` the output of the command will be scanned 
in an attempt to
+verify that checksum offloading was enabled on the port.
+
+Raises:
+InteractiveCommandExecutionError: If checksum offload is not 
enabled successfully.
+"""
+csum_output = self.send_command(f"csum set {layer} hw {port_id}")
+if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first")):
+self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
+raise InteractiveCommandExecutionError(
+f"Failed to set csum hw mode on port {port_id}"
+)
+if verify and f"checksum offload is not supported by port {port_id}" 
in csum_output:
+self._logger.debug(f"Checksum {layer} offload is not supported by 
port {port_id}:\n{csum_output}")
+
+success = False
+if "outer-ip" in layer:
+if "Outer-Ip checksum offload is hw" in csum_output:
+success = True
+if "outer-udp" in layer:
+if "Outer-Udp checksum offload is hw" in csum_output:
+success = True
+else:
+if f"{layer.upper} checksum offload is hw" in csum_output:
+success = True
+if not success and verify:
+self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
+
+def set_verbose(self, level: int, verify: bool = True) -> None:
+"""Set debug verbosity level.
+
+Args:
+level: 0 - silent except for error
+1 - fully verbose except for Tx packets
+2 - fully verbose except for Rx packets
+>2 - fully verbose
+verify: if :data:`True` an additional command will be sent to 
verify that verbose level
+is properly set. Defaults to :data:`True`.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
verbose level
+is not correctly set.
+"""
+verbose_output = self.send_command(f"set verbose {level}")
+if verify:
+if "Change verbose level" not in verbose_output:
+self._logger.debug(f"Failed to 

[PATCH v1 2/2] dts: checksum offload test suite

2024-08-12 Thread Dean Marx
test suite for verifying layer 3/4 checksum offload
features on poll mode driver.

Depends-on: patch-142762
("dts: add text parser for testpmd verbose output")
Depends-on: patch-142691
("dts: add send_packets to test suites and rework packet addressing")

Signed-off-by: Dean Marx 
---
 dts/framework/config/conf_yaml_schema.json|   3 +-
 dts/framework/remote_session/testpmd_shell.py |   2 +-
 dts/tests/TestSuite_checksum_offload.py   | 288 ++
 3 files changed, 291 insertions(+), 2 deletions(-)
 create mode 100644 dts/tests/TestSuite_checksum_offload.py

diff --git a/dts/framework/config/conf_yaml_schema.json 
b/dts/framework/config/conf_yaml_schema.json
index f02a310bb5..a83a6786df 100644
--- a/dts/framework/config/conf_yaml_schema.json
+++ b/dts/framework/config/conf_yaml_schema.json
@@ -187,7 +187,8 @@
   "enum": [
 "hello_world",
 "os_udp",
-"pmd_buffer_scatter"
+"pmd_buffer_scatter",
+"checksum_offload"
   ]
 },
 "test_target": {
diff --git a/dts/framework/remote_session/testpmd_shell.py 
b/dts/framework/remote_session/testpmd_shell.py
index be8fbdc295..01f378acc3 100644
--- a/dts/framework/remote_session/testpmd_shell.py
+++ b/dts/framework/remote_session/testpmd_shell.py
@@ -856,7 +856,7 @@ def csum_set_hw(self, layer: str, port_id: int, verify: 
bool = True) -> None:
 InteractiveCommandExecutionError: If checksum offload is not 
enabled successfully.
 """
 csum_output = self.send_command(f"csum set {layer} hw {port_id}")
-if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first")):
+if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first" in csum_output)):
 self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
 raise InteractiveCommandExecutionError(
 f"Failed to set csum hw mode on port {port_id}"
diff --git a/dts/tests/TestSuite_checksum_offload.py 
b/dts/tests/TestSuite_checksum_offload.py
new file mode 100644
index 00..7ce5897c34
--- /dev/null
+++ b/dts/tests/TestSuite_checksum_offload.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2024 University of New Hampshire
+
+"""DPDK checksum offload testing suite.
+
+This suite verifies L3/L4 checksum offload features of the Poll Mode Driver.
+On the Rx side, IPv4 and UDP/TCP checksum by hardware is checked to ensure
+checksum flags match expected flags. On the Tx side, IPv4/UDP, IPv4/TCP,
+IPv6/UDP, and IPv6/TCP insertion by hardware is checked to checksum flags
+match expected flags.
+
+"""
+
+from typing import List
+
+from scapy.all import Packet  # type: ignore[import-untyped]
+from scapy.layers.inet import GRE, IP, TCP, UDP  # type: ignore[import-untyped]
+from scapy.layers.inet6 import IPv6  # type: ignore[import-untyped]
+from scapy.layers.l2 import Dot1Q  # type: ignore[import-untyped]
+from scapy.layers.l2 import Ether  # type: ignore[import-untyped]
+from scapy.layers.vxlan import VXLAN  # type: ignore[import-untyped]
+from scapy.packet import Raw  # type: ignore[import-untyped]
+
+from framework.remote_session.testpmd_shell import (
+SimpleForwardingModes,
+TestPmdShell,
+VerboseOLFlag,
+)
+from framework.test_suite import TestSuite
+
+
+class TestChecksumOffload(TestSuite):
+"""Checksum offload test suite.
+
+This suite consists of 8 test cases:
+1. Insert checksum on transmit packet
+2. Do not insert checksum on transmit packet
+3. Validate Rx checksum valid flags
+4. Hardware checksum check L4 Rx
+5. Hardware checksum check L3 Rx
+6. Hardware checksum check L4 Tx
+7. Hardware checksum check L3 Tx
+8. Checksum offload with vlan
+
+"""
+
+def set_up_suite(self) -> None:
+"""Set up the test suite.
+
+Setup:
+Verify that at least two port links are created when the
+test run is initialized.
+"""
+self.verify(len(self._port_links) > 1, "Not enough port links.")
+
+def send_packet_and_verify(
+self, packet_list: List[Packet], load: str, should_receive: bool
+) -> None:
+"""Send and verify packet is received on the traffic generator.
+
+Args:
+packet: Scapy packet to send and verify.
+load: Raw layer load attribute in the sent packet.
+should_receive: Indicates whether the packet should be received by 
the traffic generator.
+"""
+for i in range(0, len(packet_list)):
+received_packets = 
self.send_packet_and_capture(packet=packet_list[i])
+received = any(
+packet.haslayer(Raw) and load in str(packet.load) for packet 
in received_packets
+)
+self.verify(
+received == should_receive,
+f"Packet was {'dropped' if should_receive else 'received'}",
+)
+
+def 

[RFC v1 0/2] dts: initial checksum offload suite

2024-08-12 Thread Dean Marx
Test suite for verifying checksum hardware offload through the
PMD works as expected. This is done by checking the verbose output in
testpmd while in csum forwarding mode, specifically the ol_flags
section, to ensure they match the flags in the test plan. However, 
there are a few issues I noticed while writing the suite that made
me hesitant to submit a patch:

1. SCTP hardware offload is not supported on any of the NICs I tested
on. I've tried this on mlx5, i40e, and bnxt drivers and none of them
support it. SCTP offload is used as part of almost every test case, so I
removed SCTP packets from the suite entirely. I intend to keep it that
way unless anyone is able to use the command "csum set sctp hw 0"
without an "SCTP not supported" error.
2. There are two Tx checksum test cases, which involve checking the Tx
flags section of verbose output to ensure they match the ones in the
test plan. However, the Tx flags don't appear to change at all
depending on what packet you send to testpmd, which leaves me with no
way to verify correct behavior. I'm considering removing the Tx cases
entirely, but they are a large chunk of the suite so if anyone disagrees
I can look for more of a workaround.

If anyone has any comments or advice about the issues above it is
greatly appreciated.

Dean Marx (2):
  dts: add csum HW offload to testpmd shell
  dts: checksum offload test suite

 dts/framework/config/conf_yaml_schema.json|   3 +-
 dts/framework/remote_session/testpmd_shell.py |  94 ++
 dts/tests/TestSuite_checksum_offload.py   | 288 ++
 3 files changed, 384 insertions(+), 1 deletion(-)
 create mode 100644 dts/tests/TestSuite_checksum_offload.py

-- 
2.44.0



[RFC v1 1/2] dts: add csum HW offload to testpmd shell

2024-08-12 Thread Dean Marx
add csum_set_hw method to testpmd shell class. Port over
set_verbose and port start/stop from queue start/stop suite.

Signed-off-by: Dean Marx 
---
 dts/framework/remote_session/testpmd_shell.py | 94 +++
 1 file changed, 94 insertions(+)

diff --git a/dts/framework/remote_session/testpmd_shell.py 
b/dts/framework/remote_session/testpmd_shell.py
index 43e9f56517..be8fbdc295 100644
--- a/dts/framework/remote_session/testpmd_shell.py
+++ b/dts/framework/remote_session/testpmd_shell.py
@@ -806,6 +806,100 @@ def show_port_stats(self, port_id: int) -> 
TestPmdPortStats:
 
 return TestPmdPortStats.parse(output)
 
+def port_stop(self, port: int, verify: bool = True):
+"""Stop specified port.
+
+Args:
+port: Specifies the port number to use, must be between 0-32.
+verify: If :data:`True`, the output of the command is scanned
+to ensure specified port is stopped. If not, it is considered
+an error.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
the port
+is not stopped."""
+port_output = self.send_command(f"port stop {port}")
+if verify:
+if "Done" not in port_output:
+self._logger.debug(f"Failed to stop port {port}: 
\n{port_output}")
+raise InteractiveCommandExecutionError(f"Testpmd failed to 
stop port {port}.")
+
+def port_start(self, port: int, verify: bool = True):
+"""Start specified port.
+
+Args:
+port: Specifies the port number to use, must be between 0-32.
+verify: If :data:`True`, the output of the command is scanned
+to ensure specified port is started. If not, it is considered
+an error.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
the port
+is not started."""
+port_output = self.send_command(f"port start {port}")
+if verify:
+if "Done" not in port_output:
+self._logger.debug(f"Failed to start port {port}: 
\n{port_output}")
+raise InteractiveCommandExecutionError(f"Testpmd failed to 
start port {port}.")
+
+def csum_set_hw(self, layer: str, port_id: int, verify: bool = True) -> 
None:
+"""Enables hardware checksum offloading on the specified layer.
+
+Args:
+layer: The layer that checksum offloading should be enabled on.
+options: tcp, ip, udp, sctp, outer-ip, outer-udp.
+port_id: The port number to enable checksum offloading on, should 
be within 0-32.
+verify: If :data:`True` the output of the command will be scanned 
in an attempt to
+verify that checksum offloading was enabled on the port.
+
+Raises:
+InteractiveCommandExecutionError: If checksum offload is not 
enabled successfully.
+"""
+csum_output = self.send_command(f"csum set {layer} hw {port_id}")
+if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first")):
+self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
+raise InteractiveCommandExecutionError(
+f"Failed to set csum hw mode on port {port_id}"
+)
+if verify and f"checksum offload is not supported by port {port_id}" 
in csum_output:
+self._logger.debug(f"Checksum {layer} offload is not supported by 
port {port_id}:\n{csum_output}")
+
+success = False
+if "outer-ip" in layer:
+if "Outer-Ip checksum offload is hw" in csum_output:
+success = True
+if "outer-udp" in layer:
+if "Outer-Udp checksum offload is hw" in csum_output:
+success = True
+else:
+if f"{layer.upper} checksum offload is hw" in csum_output:
+success = True
+if not success and verify:
+self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
+
+def set_verbose(self, level: int, verify: bool = True) -> None:
+"""Set debug verbosity level.
+
+Args:
+level: 0 - silent except for error
+1 - fully verbose except for Tx packets
+2 - fully verbose except for Rx packets
+>2 - fully verbose
+verify: if :data:`True` an additional command will be sent to 
verify that verbose level
+is properly set. Defaults to :data:`True`.
+
+Raises:
+InteractiveCommandExecutionError: If `verify` is :data:`True` and 
verbose level
+is not correctly set.
+"""
+verbose_output = self.send_command(f"set verbose {level}")
+if verify:
+if "Change verbose level" not in verbose_output:
+self._logger.debug(f"Failed to 

[RFC v1 2/2] dts: checksum offload test suite

2024-08-12 Thread Dean Marx
test suite for verifying layer 3/4 checksum offload
features on poll mode driver.

Depends-on: patch-142762
("dts: add text parser for testpmd verbose output")
Depends-on: patch-142691
("dts: add send_packets to test suites and rework packet addressing")

Signed-off-by: Dean Marx 
---
 dts/framework/config/conf_yaml_schema.json|   3 +-
 dts/framework/remote_session/testpmd_shell.py |   2 +-
 dts/tests/TestSuite_checksum_offload.py   | 288 ++
 3 files changed, 291 insertions(+), 2 deletions(-)
 create mode 100644 dts/tests/TestSuite_checksum_offload.py

diff --git a/dts/framework/config/conf_yaml_schema.json 
b/dts/framework/config/conf_yaml_schema.json
index f02a310bb5..a83a6786df 100644
--- a/dts/framework/config/conf_yaml_schema.json
+++ b/dts/framework/config/conf_yaml_schema.json
@@ -187,7 +187,8 @@
   "enum": [
 "hello_world",
 "os_udp",
-"pmd_buffer_scatter"
+"pmd_buffer_scatter",
+"checksum_offload"
   ]
 },
 "test_target": {
diff --git a/dts/framework/remote_session/testpmd_shell.py 
b/dts/framework/remote_session/testpmd_shell.py
index be8fbdc295..01f378acc3 100644
--- a/dts/framework/remote_session/testpmd_shell.py
+++ b/dts/framework/remote_session/testpmd_shell.py
@@ -856,7 +856,7 @@ def csum_set_hw(self, layer: str, port_id: int, verify: 
bool = True) -> None:
 InteractiveCommandExecutionError: If checksum offload is not 
enabled successfully.
 """
 csum_output = self.send_command(f"csum set {layer} hw {port_id}")
-if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first")):
+if (verify and ("Bad arguments" in csum_output or f"Please stop port 
{port_id} first" in csum_output)):
 self._logger.debug(f"Failed to set csum hw mode on port 
{port_id}:\n{csum_output}")
 raise InteractiveCommandExecutionError(
 f"Failed to set csum hw mode on port {port_id}"
diff --git a/dts/tests/TestSuite_checksum_offload.py 
b/dts/tests/TestSuite_checksum_offload.py
new file mode 100644
index 00..7ce5897c34
--- /dev/null
+++ b/dts/tests/TestSuite_checksum_offload.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2024 University of New Hampshire
+
+"""DPDK checksum offload testing suite.
+
+This suite verifies L3/L4 checksum offload features of the Poll Mode Driver.
+On the Rx side, IPv4 and UDP/TCP checksum by hardware is checked to ensure
+checksum flags match expected flags. On the Tx side, IPv4/UDP, IPv4/TCP,
+IPv6/UDP, and IPv6/TCP insertion by hardware is checked to checksum flags
+match expected flags.
+
+"""
+
+from typing import List
+
+from scapy.all import Packet  # type: ignore[import-untyped]
+from scapy.layers.inet import GRE, IP, TCP, UDP  # type: ignore[import-untyped]
+from scapy.layers.inet6 import IPv6  # type: ignore[import-untyped]
+from scapy.layers.l2 import Dot1Q  # type: ignore[import-untyped]
+from scapy.layers.l2 import Ether  # type: ignore[import-untyped]
+from scapy.layers.vxlan import VXLAN  # type: ignore[import-untyped]
+from scapy.packet import Raw  # type: ignore[import-untyped]
+
+from framework.remote_session.testpmd_shell import (
+SimpleForwardingModes,
+TestPmdShell,
+VerboseOLFlag,
+)
+from framework.test_suite import TestSuite
+
+
+class TestChecksumOffload(TestSuite):
+"""Checksum offload test suite.
+
+This suite consists of 8 test cases:
+1. Insert checksum on transmit packet
+2. Do not insert checksum on transmit packet
+3. Validate Rx checksum valid flags
+4. Hardware checksum check L4 Rx
+5. Hardware checksum check L3 Rx
+6. Hardware checksum check L4 Tx
+7. Hardware checksum check L3 Tx
+8. Checksum offload with vlan
+
+"""
+
+def set_up_suite(self) -> None:
+"""Set up the test suite.
+
+Setup:
+Verify that at least two port links are created when the
+test run is initialized.
+"""
+self.verify(len(self._port_links) > 1, "Not enough port links.")
+
+def send_packet_and_verify(
+self, packet_list: List[Packet], load: str, should_receive: bool
+) -> None:
+"""Send and verify packet is received on the traffic generator.
+
+Args:
+packet: Scapy packet to send and verify.
+load: Raw layer load attribute in the sent packet.
+should_receive: Indicates whether the packet should be received by 
the traffic generator.
+"""
+for i in range(0, len(packet_list)):
+received_packets = 
self.send_packet_and_capture(packet=packet_list[i])
+received = any(
+packet.haslayer(Raw) and load in str(packet.load) for packet 
in received_packets
+)
+self.verify(
+received == should_receive,
+f"Packet was {'dropped' if should_receive else 'received'}",
+)
+
+def 

RE: [RFC PATCH] config: make queues per port a meson config option

2024-08-12 Thread Morten Brørup
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> 
> The default number of ethernet queues per port is currently set to
> 1k which is more than enough for most applications, but still is lower
> than the total number of queues which may be available on modern NICs.
> Rather than increasing the max queues further, which will increase
> the memory footprint (since the value is used in array dimensioning),
> we can instead make the value a meson tunable option - and reduce the
> default value to 256 in the process.

Overall, I agree that this tunable is not very exotic, and can be exposed as 
suggested.
The reduction of the default value must be mentioned in the release notes.


>  # set other values pulled from the build options
>  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> +dpdk_conf.set('RTE_MAX_QUEUES_PER_PORT',
> get_option('max_queues_per_ethport'))

Please rename RTE_MAX_QUEUES_PER_PORT to _PER_ETHPORT, so it resembles 
MAX_ETHPORTS. For API backwards compatibility, you can add:
#define RTE_MAX_QUEUES_PER_PORT RTE_MAX_QUEUES_PER_ETHPORT


I wonder if it would be possible to have separate max sizes for RX and TX 
queues? If it can save a non-negligible amount of memory, it might be useful 
for some applications.


With suggested changes (splitting RX/TX maximums not required),
Acked-by: Morten Brørup 



Re: [RFC PATCH] config: make queues per port a meson config option

2024-08-12 Thread Bruce Richardson
On Mon, Aug 12, 2024 at 04:10:49PM +0200, Morten Brørup wrote:
> > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > 
> > The default number of ethernet queues per port is currently set to
> > 1k which is more than enough for most applications, but still is lower
> > than the total number of queues which may be available on modern NICs.
> > Rather than increasing the max queues further, which will increase
> > the memory footprint (since the value is used in array dimensioning),
> > we can instead make the value a meson tunable option - and reduce the
> > default value to 256 in the process.
> 
> Overall, I agree that this tunable is not very exotic, and can be exposed as 
> suggested.
> The reduction of the default value must be mentioned in the release notes.
> 

Yes, good point. I'll add that in any next version.

> 
> >  # set other values pulled from the build options
> >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > +dpdk_conf.set('RTE_MAX_QUEUES_PER_PORT',
> > get_option('max_queues_per_ethport'))
> 
> Please rename RTE_MAX_QUEUES_PER_PORT to _PER_ETHPORT, so it resembles 
> MAX_ETHPORTS. For API backwards compatibility, you can add:
> #define RTE_MAX_QUEUES_PER_PORT RTE_MAX_QUEUES_PER_ETHPORT
> 

Agree that would more consistent. That would probably best be a separate
patch, since we'd want to convert all internal use over. Will make this a
two-patch set in next version.

> 
> I wonder if it would be possible to have separate max sizes for RX and TX 
> queues? If it can save a non-negligible amount of memory, it might be useful 
> for some applications.
> 

That is an interesting idea. It would certainly allow saving memory for
use-cases where you want a large number of rx queues only, or tx queues
only. However, the defaults are still likely to remain the same. The main
issue I would have with it, is that it would mean having two build time
options rather than 1, and I'm a bit concerned at the number of options we
seem to be accumulating in DPDK.

Overall, I'm tending towards suggesting that we not do that split, but I'm
open to being convinced on it.

> 
> With suggested changes (splitting RX/TX maximums not required),
> Acked-by: Morten Brørup 
> 


[PATCH v1] dts: add verify argument to set forward mode

2024-08-12 Thread Dean Marx
Add optional verify argument to the set_forward_mode
method in testpmd shell.

Signed-off-by: Dean Marx 
---
 dts/framework/remote_session/testpmd_shell.py | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/dts/framework/remote_session/testpmd_shell.py 
b/dts/framework/remote_session/testpmd_shell.py
index 43e9f56517..b8cdfc01b9 100644
--- a/dts/framework/remote_session/testpmd_shell.py
+++ b/dts/framework/remote_session/testpmd_shell.py
@@ -717,11 +717,12 @@ def set_forward_mode(self, mode: SimpleForwardingModes, 
verify: bool = True):
 fails to update.
 """
 set_fwd_output = self.send_command(f"set fwd {mode.value}")
-if f"Set {mode.value} packet forwarding mode" not in set_fwd_output:
-self._logger.debug(f"Failed to set fwd mode to 
{mode.value}:\n{set_fwd_output}")
-raise InteractiveCommandExecutionError(
-f"Test pmd failed to set fwd mode to {mode.value}"
-)
+if verify:
+if f"Set {mode.value} packet forwarding mode" not in 
set_fwd_output:
+self._logger.debug(f"Failed to set fwd mode to 
{mode.value}:\n{set_fwd_output}")
+raise InteractiveCommandExecutionError(
+f"Test pmd failed to set fwd mode to {mode.value}"
+)
 
 def show_port_info_all(self) -> list[TestPmdPort]:
 """Returns the information of all the ports.
-- 
2.44.0



Re: Inquiry Regarding Sending Patches to DPDK

2024-08-12 Thread Stephen Hemminger
On Mon, 12 Aug 2024 07:52:39 +
王颢  wrote:

> Dear all,
> 
> I hope this message finds you well.
> 
> I would like to seek your advice on an issue I've encountered. Our company 
> has recently enabled two-factor authentication (2FA) for our email accounts. 
> The IT department has suggested that I abandon using the "git send-email" 
> method, as configured through git config, to send patches to DPDK. Instead, 
> they have recommended using "Exchange anonymous send mail." However, I 
> believe this approach might not be feasible.
> 
> I wanted to confirm this with you and see if you could provide any guidance 
> on the matter. I look forward to your response.
> 
> Thank you very much for your time and assistance.
> 
> Best regards,
> Howard Wang

There are two issues here:
Using git send-email is not required. You can generate patch files and put them 
in
your email.
BUT Microsoft Exchange does not preserve text formatting in messages. Any 
patches sent
that way are usually corrupted.

At Microsoft, we ended up using a special server (not Exchange) to send Linux 
and DPDK
patches. Or using non-corporate accounts.


RE: [RFC PATCH] config: make queues per port a meson config option

2024-08-12 Thread Morten Brørup
> From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> 
> On Mon, Aug 12, 2024 at 04:10:49PM +0200, Morten Brørup wrote:
> > > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > >
> > > The default number of ethernet queues per port is currently set to
> > > 1k which is more than enough for most applications, but still is lower
> > > than the total number of queues which may be available on modern NICs.
> > > Rather than increasing the max queues further, which will increase
> > > the memory footprint (since the value is used in array dimensioning),
> > > we can instead make the value a meson tunable option - and reduce the
> > > default value to 256 in the process.
> >
> > Overall, I agree that this tunable is not very exotic, and can be exposed as
> suggested.
> > The reduction of the default value must be mentioned in the release notes.
> >
> 
> Yes, good point. I'll add that in any next version.

ACK.

> 
> >
> > >  # set other values pulled from the build options
> > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > +dpdk_conf.set('RTE_MAX_QUEUES_PER_PORT',
> > > get_option('max_queues_per_ethport'))
> >
> > Please rename RTE_MAX_QUEUES_PER_PORT to _PER_ETHPORT, so it resembles
> MAX_ETHPORTS. For API backwards compatibility, you can add:
> > #define RTE_MAX_QUEUES_PER_PORT RTE_MAX_QUEUES_PER_ETHPORT
> >
> 
> Agree that would more consistent. That would probably best be a separate
> patch, since we'd want to convert all internal use over. Will make this a
> two-patch set in next version.

ACK. And agree about two-patch series.

> 
> >
> > I wonder if it would be possible to have separate max sizes for RX and TX
> queues? If it can save a non-negligible amount of memory, it might be useful
> for some applications.
> >
> 
> That is an interesting idea. It would certainly allow saving memory for
> use-cases where you want a large number of rx queues only, or tx queues
> only. However, the defaults are still likely to remain the same. The main
> issue I would have with it, is that it would mean having two build time
> options rather than 1, and I'm a bit concerned at the number of options we
> seem to be accumulating in DPDK.
> 
> Overall, I'm tending towards suggesting that we not do that split, but I'm
> open to being convinced on it.

I would guess that many applications have an asymmetrical split of number of 
RX/TX queues. So I would argue that:
The reason to make this configurable in meson is to conserve memory, so why 
only go half the way if there is more memory to be conserved?

The distros will use oversize maximums anyway, but custom built applications 
might benefit.

Here's a weird thought:
Perhaps RX and TX maximums can be controlled individually by changing 
rte_config.h, and they can be overridden via one meson configuration parameter 
to set both (to the same value).

> 
> >
> > With suggested changes (splitting RX/TX maximums not required),
> > Acked-by: Morten Brørup 
> >

My ACK remains; splitting RX/TX maximums is not Must Have, it is Nice To Have.



[PATCH v1 0/1] dts: fix hugepage mounting

2024-08-12 Thread jspewock
From: Jeremy Spewock 

Currently in the DTS framework there are some commands used for
remounting hugepages that require super-user privileges but do not use
them, causing them to throw errors whenever they are run as a non-root
user. This patches fixes these problems by simply adding a flag that
enables admin privileges for the commands that are missing them.

Jeremy Spewock (1):
  dts: add admin privileges to hugepage mounting

 dts/framework/testbed_model/linux_session.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

-- 
2.45.2



[PATCH v1 1/1] dts: add admin privileges to hugepage mounting

2024-08-12 Thread jspewock
From: Jeremy Spewock 

There were two different commands in the hugepage mounting process that
were not using super-user privileges; one for unmounting hugepages and
another for re-mounting them. This patch adds the flag that enables
enhanced permissions for both of these actions.

Bugzilla ID: 1439
Fixes: b8bdc4c58f57 ("dts: replace pexpect with fabric")
Cc: juraj.lin...@pantheon.tech

Signed-off-by: Jeremy Spewock 
---
 dts/framework/testbed_model/linux_session.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dts/framework/testbed_model/linux_session.py 
b/dts/framework/testbed_model/linux_session.py
index 99abc21353..544a665b83 100644
--- a/dts/framework/testbed_model/linux_session.py
+++ b/dts/framework/testbed_model/linux_session.py
@@ -123,12 +123,12 @@ def _get_numa_nodes(self) -> list[int]:
 def _mount_huge_pages(self) -> None:
 self._logger.info("Re-mounting Hugepages.")
 hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
-self.send_command(f"umount $({hugapge_fs_cmd})")
+self.send_command(f"umount $({hugapge_fs_cmd})", privileged=True)
 result = self.send_command(hugapge_fs_cmd)
 if result.stdout == "":
 remote_mount_path = "/mnt/huge"
-self.send_command(f"mkdir -p {remote_mount_path}")
-self.send_command(f"mount -t hugetlbfs nodev {remote_mount_path}")
+self.send_command(f"mkdir -p {remote_mount_path}", privileged=True)
+self.send_command(f"mount -t hugetlbfs nodev {remote_mount_path}", 
privileged=True)
 
 def _supports_numa(self) -> bool:
 # the system supports numa if self._numa_nodes is non-empty and there 
are more
-- 
2.45.2



Re: [RFC PATCH] config: make queues per port a meson config option

2024-08-12 Thread Bruce Richardson
On Mon, Aug 12, 2024 at 05:02:11PM +0200, Morten Brørup wrote:
> > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > 
> > On Mon, Aug 12, 2024 at 04:10:49PM +0200, Morten Brørup wrote:
> > > > From: Bruce Richardson [mailto:bruce.richard...@intel.com]
> > > >
> > > > The default number of ethernet queues per port is currently set to
> > > > 1k which is more than enough for most applications, but still is lower
> > > > than the total number of queues which may be available on modern NICs.
> > > > Rather than increasing the max queues further, which will increase
> > > > the memory footprint (since the value is used in array dimensioning),
> > > > we can instead make the value a meson tunable option - and reduce the
> > > > default value to 256 in the process.
> > >
> > > Overall, I agree that this tunable is not very exotic, and can be exposed 
> > > as
> > suggested.
> > > The reduction of the default value must be mentioned in the release notes.
> > >
> > 
> > Yes, good point. I'll add that in any next version.
> 
> ACK.
> 
> > 
> > >
> > > >  # set other values pulled from the build options
> > > >  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > > > +dpdk_conf.set('RTE_MAX_QUEUES_PER_PORT',
> > > > get_option('max_queues_per_ethport'))
> > >
> > > Please rename RTE_MAX_QUEUES_PER_PORT to _PER_ETHPORT, so it resembles
> > MAX_ETHPORTS. For API backwards compatibility, you can add:
> > > #define RTE_MAX_QUEUES_PER_PORT RTE_MAX_QUEUES_PER_ETHPORT
> > >
> > 
> > Agree that would more consistent. That would probably best be a separate
> > patch, since we'd want to convert all internal use over. Will make this a
> > two-patch set in next version.
> 
> ACK. And agree about two-patch series.
> 
> > 
> > >
> > > I wonder if it would be possible to have separate max sizes for RX and TX
> > queues? If it can save a non-negligible amount of memory, it might be useful
> > for some applications.
> > >
> > 
> > That is an interesting idea. It would certainly allow saving memory for
> > use-cases where you want a large number of rx queues only, or tx queues
> > only. However, the defaults are still likely to remain the same. The main
> > issue I would have with it, is that it would mean having two build time
> > options rather than 1, and I'm a bit concerned at the number of options we
> > seem to be accumulating in DPDK.
> > 
> > Overall, I'm tending towards suggesting that we not do that split, but I'm
> > open to being convinced on it.
> 
> I would guess that many applications have an asymmetrical split of number of 
> RX/TX queues. So I would argue that:
> The reason to make this configurable in meson is to conserve memory, so why 
> only go half the way if there is more memory to be conserved?
> 
> The distros will use oversize maximums anyway, but custom built applications 
> might benefit.
> 
> Here's a weird thought:
> Perhaps RX and TX maximums can be controlled individually by changing 
> rte_config.h, and they can be overridden via one meson configuration 
> parameter to set both (to the same value).
> 
> > 
> > >
> > > With suggested changes (splitting RX/TX maximums not required),
> > > Acked-by: Morten Brørup 
> > >
> 
> My ACK remains; splitting RX/TX maximums is not Must Have, it is Nice To Have.
>
Let me see how much in involved in trying to split... 


Re: [PATCH v1 1/1] dts: add admin privileges to hugepage mounting

2024-08-12 Thread Nicholas Pratte
Reviewed-by: Nicholas Pratte 

On Mon, Aug 12, 2024 at 11:04 AM  wrote:
>
> From: Jeremy Spewock 
>
> There were two different commands in the hugepage mounting process that
> were not using super-user privileges; one for unmounting hugepages and
> another for re-mounting them. This patch adds the flag that enables
> enhanced permissions for both of these actions.
>
> Bugzilla ID: 1439
> Fixes: b8bdc4c58f57 ("dts: replace pexpect with fabric")
> Cc: juraj.lin...@pantheon.tech
>
> Signed-off-by: Jeremy Spewock 
> ---
>  dts/framework/testbed_model/linux_session.py | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/dts/framework/testbed_model/linux_session.py 
> b/dts/framework/testbed_model/linux_session.py
> index 99abc21353..544a665b83 100644
> --- a/dts/framework/testbed_model/linux_session.py
> +++ b/dts/framework/testbed_model/linux_session.py
> @@ -123,12 +123,12 @@ def _get_numa_nodes(self) -> list[int]:
>  def _mount_huge_pages(self) -> None:
>  self._logger.info("Re-mounting Hugepages.")
>  hugapge_fs_cmd = "awk '/hugetlbfs/ { print $2 }' /proc/mounts"
> -self.send_command(f"umount $({hugapge_fs_cmd})")
> +self.send_command(f"umount $({hugapge_fs_cmd})", privileged=True)
>  result = self.send_command(hugapge_fs_cmd)
>  if result.stdout == "":
>  remote_mount_path = "/mnt/huge"
> -self.send_command(f"mkdir -p {remote_mount_path}")
> -self.send_command(f"mount -t hugetlbfs nodev 
> {remote_mount_path}")
> +self.send_command(f"mkdir -p {remote_mount_path}", 
> privileged=True)
> +self.send_command(f"mount -t hugetlbfs nodev 
> {remote_mount_path}", privileged=True)
>
>  def _supports_numa(self) -> bool:
>  # the system supports numa if self._numa_nodes is non-empty and 
> there are more
> --
> 2.45.2
>


[PATCH v3 00/16] Improve rte_tm support in ICE driver

2024-08-12 Thread Bruce Richardson
This patchset expands the capabilities of the traffic management
support in the ICE driver. It allows the driver to support different
sizes of topologies, and support >256 queues and more than 3 hierarchy
layers.

---
Depends-on: series-32719 ("improve rte_rm APIs")

v3:
* remove/implement some code TODOs
* add patch 16 to set.

v2:
* Correct typo in commit log of one patch
* Add missing depends-on tag to the cover letter

Bruce Richardson (16):
  net/ice: add traffic management node query function
  net/ice: detect stopping a flow-director queue twice
  net/ice: improve Tx scheduler graph output
  net/ice: add option to choose DDP package file
  net/ice: add option to download scheduler topology
  net/ice/base: allow init without TC class sched nodes
  net/ice/base: set VSI index on newly created nodes
  net/ice/base: read VSI layer info from VSI
  net/ice/base: remove 255 limit on sched child nodes
  net/ice/base: optimize subtree searches
  net/ice/base: make functions non-static
  net/ice/base: remove flag checks before topology upload
  net/ice: limit the number of queues to sched capabilities
  net/ice: enhance Tx scheduler hierarchy support
  net/ice: add minimal capability reporting API
  net/ice: do early check on node level when adding

 doc/guides/nics/ice.rst  |   9 +
 drivers/net/ice/base/ice_ddp.c   |  51 +--
 drivers/net/ice/base/ice_ddp.h   |   4 +-
 drivers/net/ice/base/ice_sched.c |  56 +--
 drivers/net/ice/base/ice_sched.h |   8 +
 drivers/net/ice/base/ice_type.h  |   3 +-
 drivers/net/ice/ice_diagnose.c   | 196 ---
 drivers/net/ice/ice_ethdev.c |  92 +++--
 drivers/net/ice/ice_ethdev.h |  18 +-
 drivers/net/ice/ice_rxtx.c   |  15 +
 drivers/net/ice/ice_tm.c | 574 +++
 11 files changed, 497 insertions(+), 529 deletions(-)

--
2.43.0



[PATCH v3 01/16] net/ice: add traffic management node query function

2024-08-12 Thread Bruce Richardson
Implement the new node querying function for the "ice" net driver.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_tm.c | 48 
 1 file changed, 48 insertions(+)

diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 8a29a9e744..459446a6b0 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -17,6 +17,11 @@ static int ice_tm_node_add(struct rte_eth_dev *dev, uint32_t 
node_id,
  uint32_t weight, uint32_t level_id,
  const struct rte_tm_node_params *params,
  struct rte_tm_error *error);
+static int ice_node_query(const struct rte_eth_dev *dev, uint32_t node_id,
+   uint32_t *parent_node_id, uint32_t *priority,
+   uint32_t *weight, uint32_t *level_id,
+   struct rte_tm_node_params *params,
+   struct rte_tm_error *error);
 static int ice_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
struct rte_tm_error *error);
 static int ice_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
@@ -35,6 +40,7 @@ const struct rte_tm_ops ice_tm_ops = {
.node_add = ice_tm_node_add,
.node_delete = ice_tm_node_delete,
.node_type_get = ice_node_type_get,
+   .node_query = ice_node_query,
.hierarchy_commit = ice_hierarchy_commit,
 };
 
@@ -219,6 +225,48 @@ ice_node_type_get(struct rte_eth_dev *dev, uint32_t 
node_id,
return 0;
 }
 
+static int
+ice_node_query(const struct rte_eth_dev *dev, uint32_t node_id,
+   uint32_t *parent_node_id, uint32_t *priority,
+   uint32_t *weight, uint32_t *level_id,
+   struct rte_tm_node_params *params,
+   struct rte_tm_error *error)
+{
+   struct ice_pf *pf = ICE_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+   struct ice_tm_node *tm_node;
+
+   if (node_id == RTE_TM_NODE_ID_NULL) {
+   error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+   error->message = "invalid node id";
+   return -EINVAL;
+   }
+
+   /* check if the node id exists */
+   tm_node = find_node(pf->tm_conf.root, node_id);
+   if (!tm_node) {
+   error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+   error->message = "no such node";
+   return -EEXIST;
+   }
+
+   if (parent_node_id != NULL)
+   *parent_node_id = tm_node->parent->id;
+
+   if (priority != NULL)
+   *priority = tm_node->priority;
+
+   if (weight != NULL)
+   *weight = tm_node->weight;
+
+   if (level_id != NULL)
+   *level_id = tm_node->level;
+
+   if (params != NULL)
+   *params = tm_node->params;
+
+   return 0;
+}
+
 static inline struct ice_tm_shaper_profile *
 ice_shaper_profile_search(struct rte_eth_dev *dev,
   uint32_t shaper_profile_id)
-- 
2.43.0



[PATCH v3 02/16] net/ice: detect stopping a flow-director queue twice

2024-08-12 Thread Bruce Richardson
If the flow-director queue is stopped at some point during the running
of an application, the shutdown procedure for the port issues an error
as it tries to stop the queue a second time, and fails to do so. We can
eliminate this error by setting the tail-register pointer to NULL on
stop, and checking for that condition in subsequent stop calls. Since
the register pointer is set on start, any restarting of the queue will
allow a stop call to progress as normal.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_rxtx.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index f270498ed1..a150d28e73 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1139,6 +1139,10 @@ ice_fdir_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
tx_queue_id);
return -EINVAL;
}
+   if (txq->qtx_tail == NULL) {
+   PMD_DRV_LOG(INFO, "TX queue %u not started\n", tx_queue_id);
+   return 0;
+   }
vsi = txq->vsi;
 
q_ids[0] = txq->reg_idx;
@@ -1153,6 +1157,7 @@ ice_fdir_tx_queue_stop(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
}
 
txq->tx_rel_mbufs(txq);
+   txq->qtx_tail = NULL;
 
return 0;
 }
-- 
2.43.0



[PATCH v3 03/16] net/ice: improve Tx scheduler graph output

2024-08-12 Thread Bruce Richardson
The function to dump the TX scheduler topology only adds to the chart
nodes connected to TX queues or for the flow director VSI. Change the
function to work recursively from the root node and thereby include all
scheduler nodes, whether in use or not, in the dump.

Also, improve the output of the Tx scheduler graphing function:

* Add VSI details to each node in graph
* When number of children is >16, skip middle nodes to reduce size of
  the graph, otherwise dot output is unviewable for large hierarchies
* For VSIs other than zero, use dot's clustering method to put those
  VSIs into subgraphs with borders
* For leaf nodes, display queue numbers for the any nodes assigned to
  ethdev NIC Tx queues

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_diagnose.c | 196 -
 1 file changed, 69 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ice/ice_diagnose.c b/drivers/net/ice/ice_diagnose.c
index c357554707..623d84e37d 100644
--- a/drivers/net/ice/ice_diagnose.c
+++ b/drivers/net/ice/ice_diagnose.c
@@ -545,29 +545,15 @@ static void print_rl_profile(struct 
ice_aqc_rl_profile_elem *prof,
fprintf(stream, "\t\t\t\t\t\n");
 }
 
-static
-void print_elem_type(FILE *stream, u8 type)
+static const char *
+get_elem_type(u8 type)
 {
-   switch (type) {
-   case 1:
-   fprintf(stream, "root");
-   break;
-   case 2:
-   fprintf(stream, "tc");
-   break;
-   case 3:
-   fprintf(stream, "se_generic");
-   break;
-   case 4:
-   fprintf(stream, "entry_point");
-   break;
-   case 5:
-   fprintf(stream, "leaf");
-   break;
-   default:
-   fprintf(stream, "%d", type);
-   break;
-   }
+   static const char * const ice_sched_node_types[] = {
+   "Undefined", "Root", "TC", "SE Generic", "SW Entry", 
"Leaf"
+   };
+   if (type < RTE_DIM(ice_sched_node_types))
+   return ice_sched_node_types[type];
+   return "*UNKNOWN*";
 }
 
 static
@@ -602,7 +588,9 @@ void print_priority_mode(FILE *stream, bool flag)
 }
 
 static
-void print_node(struct ice_aqc_txsched_elem_data *data,
+void print_node(struct ice_sched_node *node,
+   struct rte_eth_dev_data *ethdata,
+   struct ice_aqc_txsched_elem_data *data,
struct ice_aqc_rl_profile_elem *cir_prof,
struct ice_aqc_rl_profile_elem *eir_prof,
struct ice_aqc_rl_profile_elem *shared_prof,
@@ -613,17 +601,19 @@ void print_node(struct ice_aqc_txsched_elem_data *data,
 
fprintf(stream, "\t\t\t\n");
 
-   fprintf(stream, "\t\t\t\t\n");
-   fprintf(stream, "\t\t\t\t\t teid \n");
-   fprintf(stream, "\t\t\t\t\t %d \n", data->node_teid);
-   fprintf(stream, "\t\t\t\t\n");
-
-   fprintf(stream, "\t\t\t\t\n");
-   fprintf(stream, "\t\t\t\t\t type \n");
-   fprintf(stream, "\t\t\t\t\t");
-   print_elem_type(stream, data->data.elem_type);
-   fprintf(stream, "\n");
-   fprintf(stream, "\t\t\t\t\n");
+   fprintf(stream, "\t\t\t\tteid%d\n", 
data->node_teid);
+   fprintf(stream, "\t\t\t\ttype%s\n",
+   get_elem_type(data->data.elem_type));
+   fprintf(stream, "\t\t\t\tVSI%u\n", 
node->vsi_handle);
+   if (data->data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+   for (uint16_t i = 0; i < ethdata->nb_tx_queues; i++) {
+   struct ice_tx_queue *q = ethdata->tx_queues[i];
+   if (q->q_teid == data->node_teid) {
+   fprintf(stream, 
"\t\t\t\tTXQ%u\n", i);
+   break;
+   }
+   }
+   }
 
if (!detail)
goto brief;
@@ -705,8 +695,6 @@ void print_node(struct ice_aqc_txsched_elem_data *data,
fprintf(stream, "\t\tshape=plain\n");
fprintf(stream, "\t]\n");
 
-   if (data->parent_teid != 0x)
-   fprintf(stream, "\tNODE_%d -> NODE_%d\n", data->parent_teid, 
data->node_teid);
 }
 
 static
@@ -731,112 +719,92 @@ int query_rl_profile(struct ice_hw *hw,
return 0;
 }
 
-static
-int query_node(struct ice_hw *hw, uint32_t child, uint32_t *parent,
-  uint8_t level, bool detail, FILE *stream)
+static int
+query_node(struct ice_hw *hw, struct rte_eth_dev_data *ethdata,
+   struct ice_sched_node *node, bool detail, FILE *stream)
 {
-   struct ice_aqc_txsched_elem_data data;
+   struct ice_aqc_txsched_elem_data *data = &node->info;
struct ice_aqc_rl_profile_elem cir_prof;
struct ice_aqc_rl_profile_elem eir_prof;
struct ice_aqc_rl_profile_elem shared_prof;
struct ice_aqc_rl_profile_elem *cp = NULL;
struct ice_aqc_rl_profile_elem *ep = NULL;
struct ice_aqc_rl_profile_elem *sp = NULL;
-   int status, ret;
-
-   stat

[PATCH v3 04/16] net/ice: add option to choose DDP package file

2024-08-12 Thread Bruce Richardson
The "Dynamic Device Personalization" package is loaded at initialization
time by the driver, but the specific package file loaded depends upon
what package file is found first by searching through a hard-coded list
of firmware paths. To enable greater control over the package loading,
we can add a device option to choose a specific DDP package file to
load.

Signed-off-by: Bruce Richardson 
---
 doc/guides/nics/ice.rst  |  9 +
 drivers/net/ice/ice_ethdev.c | 34 ++
 drivers/net/ice/ice_ethdev.h |  1 +
 3 files changed, 44 insertions(+)

diff --git a/doc/guides/nics/ice.rst b/doc/guides/nics/ice.rst
index ae975d19ad..58ccfbd1a5 100644
--- a/doc/guides/nics/ice.rst
+++ b/doc/guides/nics/ice.rst
@@ -108,6 +108,15 @@ Runtime Configuration
 
 -a 80:00.0,default-mac-disable=1
 
+- ``DDP Package File``
+
+  Rather than have the driver search for the DDP package to load,
+  or to override what package is used,
+  the ``ddp_pkg_file`` option can be used to provide the path to a specific 
package file.
+  For example::
+
+-a 80:00.0,ddp_pkg_file=/path/to/ice-version.pkg
+
 - ``Protocol extraction for per queue``
 
   Configure the RX queues to do protocol extraction into mbuf for protocol
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 304f959b7e..3e7ceda9ce 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -36,6 +36,7 @@
 #define ICE_ONE_PPS_OUT_ARG   "pps_out"
 #define ICE_RX_LOW_LATENCY_ARG"rx_low_latency"
 #define ICE_MBUF_CHECK_ARG   "mbuf_check"
+#define ICE_DDP_FILENAME  "ddp_pkg_file"
 
 #define ICE_CYCLECOUNTER_MASK  0xULL
 
@@ -52,6 +53,7 @@ static const char * const ice_valid_args[] = {
ICE_RX_LOW_LATENCY_ARG,
ICE_DEFAULT_MAC_DISABLE,
ICE_MBUF_CHECK_ARG,
+   ICE_DDP_FILENAME,
NULL
 };
 
@@ -692,6 +694,18 @@ handle_field_name_arg(__rte_unused const char *key, const 
char *value,
return 0;
 }
 
+static int
+handle_ddp_filename_arg(__rte_unused const char *key, const char *value, void 
*name_args)
+{
+   const char **filename = name_args;
+   if (strlen(value) >= ICE_MAX_PKG_FILENAME_SIZE) {
+   PMD_DRV_LOG(ERR, "The DDP package filename is too long : '%s'", 
value);
+   return -1;
+   }
+   *filename = strdup(value);
+   return 0;
+}
+
 static void
 ice_check_proto_xtr_support(struct ice_hw *hw)
 {
@@ -1882,6 +1896,16 @@ int ice_load_pkg(struct ice_adapter *adapter, bool 
use_dsn, uint64_t dsn)
size_t bufsz;
int err;
 
+   if (adapter->devargs.ddp_filename != NULL) {
+   strlcpy(pkg_file, adapter->devargs.ddp_filename, 
sizeof(pkg_file));
+   if (rte_firmware_read(pkg_file, &buf, &bufsz) == 0) {
+   goto load_fw;
+   } else {
+   PMD_INIT_LOG(ERR, "Cannot load DDP file: %s\n", 
pkg_file);
+   return -1;
+   }
+   }
+
if (!use_dsn)
goto no_dsn;
 
@@ -2216,6 +2240,13 @@ static int ice_parse_devargs(struct rte_eth_dev *dev)
 
ret = rte_kvargs_process(kvlist, ICE_RX_LOW_LATENCY_ARG,
 &parse_bool, &ad->devargs.rx_low_latency);
+   if (ret)
+   goto bail;
+
+   ret = rte_kvargs_process(kvlist, ICE_DDP_FILENAME,
+&handle_ddp_filename_arg, 
&ad->devargs.ddp_filename);
+   if (ret)
+   goto bail;
 
 bail:
rte_kvargs_free(kvlist);
@@ -2689,6 +2720,8 @@ ice_dev_close(struct rte_eth_dev *dev)
ice_free_hw_tbls(hw);
rte_free(hw->port_info);
hw->port_info = NULL;
+   free((void *)(uintptr_t)ad->devargs.ddp_filename);
+   ad->devargs.ddp_filename = NULL;
ice_shutdown_all_ctrlq(hw, true);
rte_free(pf->proto_xtr);
pf->proto_xtr = NULL;
@@ -6981,6 +7014,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_ice,
  ICE_PROTO_XTR_ARG 
"=[queue:]"
  ICE_SAFE_MODE_SUPPORT_ARG "=<0|1>"
  ICE_DEFAULT_MAC_DISABLE "=<0|1>"
+ ICE_DDP_FILENAME "="
  ICE_RX_LOW_LATENCY_ARG "=<0|1>");
 
 RTE_LOG_REGISTER_SUFFIX(ice_logtype_init, init, NOTICE);
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index 3ea9f37dc8..c211b5b9cc 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -568,6 +568,7 @@ struct ice_devargs {
/* Name of the field. */
char xtr_field_name[RTE_MBUF_DYN_NAMESIZE];
uint64_t mbuf_check;
+   const char *ddp_filename;
 };
 
 /**
-- 
2.43.0



[PATCH v3 05/16] net/ice: add option to download scheduler topology

2024-08-12 Thread Bruce Richardson
The DDP package file being loaded at init time may contain an
alternative Tx Scheduler topology in it. Add driver option to load this
topology at init time.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_ddp.c | 18 +++---
 drivers/net/ice/base/ice_ddp.h |  4 ++--
 drivers/net/ice/ice_ethdev.c   | 24 +++-
 drivers/net/ice/ice_ethdev.h   |  1 +
 4 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ice/base/ice_ddp.c b/drivers/net/ice/base/ice_ddp.c
index 24506dfaea..e6c42c5274 100644
--- a/drivers/net/ice/base/ice_ddp.c
+++ b/drivers/net/ice/base/ice_ddp.c
@@ -1326,7 +1326,7 @@ ice_fill_hw_ptype(struct ice_hw *hw)
  * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this
  * case.
  */
-enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len, bool 
load_sched)
 {
bool already_loaded = false;
enum ice_ddp_state state;
@@ -1344,6 +1344,18 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 
*buf, u32 len)
return state;
}
 
+   if (load_sched) {
+   enum ice_status res = ice_cfg_tx_topo(hw, buf, len);
+   if (res != ICE_SUCCESS) {
+   ice_debug(hw, ICE_DBG_INIT, "failed to apply sched 
topology  (err: %d)\n",
+   res);
+   return ICE_DDP_PKG_ERR;
+   }
+   ice_debug(hw, ICE_DBG_INIT, "Topology download successful, 
reinitializing device\n");
+   ice_deinit_hw(hw);
+   ice_init_hw(hw);
+   }
+
/* initialize package info */
state = ice_init_pkg_info(hw, pkg);
if (state)
@@ -1416,7 +1428,7 @@ enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 
*buf, u32 len)
  * related routines.
  */
 enum ice_ddp_state
-ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len)
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len, bool 
load_sched)
 {
enum ice_ddp_state state;
u8 *buf_copy;
@@ -1426,7 +1438,7 @@ ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, 
u32 len)
 
buf_copy = (u8 *)ice_memdup(hw, buf, len, ICE_NONDMA_TO_NONDMA);
 
-   state = ice_init_pkg(hw, buf_copy, len);
+   state = ice_init_pkg(hw, buf_copy, len, load_sched);
if (!ice_is_init_pkg_successful(state)) {
/* Free the copy, since we failed to initialize the package */
ice_free(hw, buf_copy);
diff --git a/drivers/net/ice/base/ice_ddp.h b/drivers/net/ice/base/ice_ddp.h
index 5761920207..2feba2e91d 100644
--- a/drivers/net/ice/base/ice_ddp.h
+++ b/drivers/net/ice/base/ice_ddp.h
@@ -451,9 +451,9 @@ ice_pkg_enum_entry(struct ice_seg *ice_seg, struct 
ice_pkg_enum *state,
 void *
 ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
 u32 sect_type);
-enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len, bool 
load_sched);
 enum ice_ddp_state
-ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len, bool 
load_sched);
 bool ice_is_init_pkg_successful(enum ice_ddp_state state);
 void ice_free_seg(struct ice_hw *hw);
 
diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 3e7ceda9ce..0d2445a317 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -37,6 +37,7 @@
 #define ICE_RX_LOW_LATENCY_ARG"rx_low_latency"
 #define ICE_MBUF_CHECK_ARG   "mbuf_check"
 #define ICE_DDP_FILENAME  "ddp_pkg_file"
+#define ICE_DDP_LOAD_SCHED"ddp_load_sched_topo"
 
 #define ICE_CYCLECOUNTER_MASK  0xULL
 
@@ -54,6 +55,7 @@ static const char * const ice_valid_args[] = {
ICE_DEFAULT_MAC_DISABLE,
ICE_MBUF_CHECK_ARG,
ICE_DDP_FILENAME,
+   ICE_DDP_LOAD_SCHED,
NULL
 };
 
@@ -1938,7 +1940,7 @@ int ice_load_pkg(struct ice_adapter *adapter, bool 
use_dsn, uint64_t dsn)
 load_fw:
PMD_INIT_LOG(DEBUG, "DDP package name: %s", pkg_file);
 
-   err = ice_copy_and_init_pkg(hw, buf, bufsz);
+   err = ice_copy_and_init_pkg(hw, buf, bufsz, 
adapter->devargs.ddp_load_sched);
if (!ice_is_init_pkg_successful(err)) {
PMD_INIT_LOG(ERR, "ice_copy_and_init_hw failed: %d\n", err);
free(buf);
@@ -1971,19 +1973,18 @@ static int
 parse_bool(const char *key, const char *value, void *args)
 {
int *i = (int *)args;
-   char *end;
-   int num;
 
-   num = strtoul(value, &end, 10);
-
-   if (num != 0 && num != 1) {
-   PMD_DRV_LOG(WARNING, "invalid value:\"%s\" for key:\"%s\", "
-   "value must be 0 or 1",
+   if (value == NULL || value[0] == '\0') {
+   PMD_DRV_LOG(WARNING, "key:\

[PATCH v3 06/16] net/ice/base: allow init without TC class sched nodes

2024-08-12 Thread Bruce Richardson
If DCB support is disabled via DDP image, there will not be any traffic
class (TC) nodes in the scheduler tree immediately above the root level.
To allow the driver to work with this scenario, we allow use of the root
node as a dummy TC0 node in case where there are no TC nodes in the
tree. For use of any other TC other than 0 (used by default in the
driver), existing behaviour of returning NULL pointer is maintained.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 6 ++
 drivers/net/ice/base/ice_type.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index 373c32a518..f75e5ae599 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -292,6 +292,10 @@ struct ice_sched_node *ice_sched_get_tc_node(struct 
ice_port_info *pi, u8 tc)
 
if (!pi || !pi->root)
return NULL;
+   /* if no TC nodes, use root as TC node 0 */
+   if (pi->has_tc == 0)
+   return tc == 0 ? pi->root : NULL;
+
for (i = 0; i < pi->root->num_children; i++)
if (pi->root->children[i]->tc_num == tc)
return pi->root->children[i];
@@ -1306,6 +1310,8 @@ int ice_sched_init_port(struct ice_port_info *pi)
ICE_AQC_ELEM_TYPE_ENTRY_POINT)
hw->sw_entry_point_layer = j;
 
+   if (buf[0].generic[j].data.elem_type == 
ICE_AQC_ELEM_TYPE_TC)
+   pi->has_tc = 1;
status = ice_sched_add_node(pi, j, &buf[i].generic[j], 
NULL);
if (status)
goto err_init_port;
diff --git a/drivers/net/ice/base/ice_type.h b/drivers/net/ice/base/ice_type.h
index 598a80155b..a70e4a8afa 100644
--- a/drivers/net/ice/base/ice_type.h
+++ b/drivers/net/ice/base/ice_type.h
@@ -1260,6 +1260,7 @@ struct ice_port_info {
struct ice_qos_cfg qos_cfg;
u8 is_vf:1;
u8 is_custom_tx_enabled:1;
+   u8 has_tc:1;
 };
 
 struct ice_switch_info {
-- 
2.43.0



[PATCH v3 07/16] net/ice/base: set VSI index on newly created nodes

2024-08-12 Thread Bruce Richardson
The ice_sched_node type has got a field for the vsi to which the node
belongs. This field was not getting set in "ice_sched_add_node", so add
a line configuring this field for each node from its parent node.
Similarly, when searching for a qgroup node, we can check for each node
that the VSI information is correct.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index f75e5ae599..f6dc5ae173 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -200,6 +200,7 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer,
node->in_use = true;
node->parent = parent;
node->tx_sched_layer = layer;
+   node->vsi_handle = parent->vsi_handle;
parent->children[parent->num_children++] = node;
node->info = elem;
return 0;
@@ -1581,7 +1582,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 
vsi_handle, u8 tc,
/* make sure the qgroup node is part of the VSI subtree */
if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
if (qgrp_node->num_children < max_children &&
-   qgrp_node->owner == owner)
+   qgrp_node->owner == owner && qgrp_node->vsi_handle 
== vsi_handle)
break;
qgrp_node = qgrp_node->sibling;
}
-- 
2.43.0



[PATCH v3 08/16] net/ice/base: read VSI layer info from VSI

2024-08-12 Thread Bruce Richardson
Rather than computing from the number of HW layers the layer of the VSI,
we can instead just read that info from the VSI node itself. This allows
the layer to be changed at runtime.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index f6dc5ae173..e398984bf2 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -1559,7 +1559,6 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 
vsi_handle, u8 tc,
u16 max_children;
 
qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
-   vsi_layer = ice_sched_get_vsi_layer(pi->hw);
max_children = pi->hw->max_children[qgrp_layer];
 
vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
@@ -1569,6 +1568,7 @@ ice_sched_get_free_qparent(struct ice_port_info *pi, u16 
vsi_handle, u8 tc,
/* validate invalid VSI ID */
if (!vsi_node)
return NULL;
+   vsi_layer = vsi_node->tx_sched_layer;
 
/* If the queue group and vsi layer are same then queues
 * are all attached directly to VSI
-- 
2.43.0



[PATCH v3 09/16] net/ice/base: remove 255 limit on sched child nodes

2024-08-12 Thread Bruce Richardson
The Tx scheduler in the ice driver can be configured to have large
numbers of child nodes at a given layer, but the driver code implicitly
limited the number of nodes to 255 by using a u8 datatype for the number
of children. Increase this to a 16-bit value throughout the code.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 25 ++---
 drivers/net/ice/base/ice_type.h  |  2 +-
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index e398984bf2..be13833e1e 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -289,7 +289,7 @@ ice_sched_get_first_node(struct ice_port_info *pi,
  */
 struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
 {
-   u8 i;
+   u16 i;
 
if (!pi || !pi->root)
return NULL;
@@ -316,7 +316,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct 
ice_sched_node *node)
 {
struct ice_sched_node *parent;
struct ice_hw *hw = pi->hw;
-   u8 i, j;
+   u16 i, j;
 
/* Free the children before freeing up the parent node
 * The parent array is updated below and that shifts the nodes
@@ -1473,7 +1473,7 @@ bool
 ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
   struct ice_sched_node *node)
 {
-   u8 i;
+   u16 i;
 
for (i = 0; i < base->num_children; i++) {
struct ice_sched_node *child = base->children[i];
@@ -1510,7 +1510,7 @@ ice_sched_get_free_qgrp(struct ice_port_info *pi,
struct ice_sched_node *qgrp_node, u8 owner)
 {
struct ice_sched_node *min_qgrp;
-   u8 min_children;
+   u16 min_children;
 
if (!qgrp_node)
return qgrp_node;
@@ -2070,7 +2070,7 @@ static void ice_sched_rm_agg_vsi_info(struct 
ice_port_info *pi, u16 vsi_handle)
  */
 static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
 {
-   u8 i;
+   u16 i;
 
for (i = 0; i < node->num_children; i++)
if (ice_sched_is_leaf_node_present(node->children[i]))
@@ -2105,7 +2105,7 @@ ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 
vsi_handle, u8 owner)
 
ice_for_each_traffic_class(i) {
struct ice_sched_node *vsi_node, *tc_node;
-   u8 j = 0;
+   u16 j = 0;
 
tc_node = ice_sched_get_tc_node(pi, i);
if (!tc_node)
@@ -2173,7 +2173,7 @@ int ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 
vsi_handle)
  */
 bool ice_sched_is_tree_balanced(struct ice_hw *hw, struct ice_sched_node *node)
 {
-   u8 i;
+   u16 i;
 
/* start from the leaf node */
for (i = 0; i < node->num_children; i++)
@@ -2247,7 +2247,8 @@ ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct 
ice_sched_node *node,
  u16 *num_nodes)
 {
u8 l = node->tx_sched_layer;
-   u8 vsil, i;
+   u8 vsil;
+   u16 i;
 
vsil = ice_sched_get_vsi_layer(hw);
 
@@ -2289,7 +2290,7 @@ ice_sched_update_parent(struct ice_sched_node *new_parent,
struct ice_sched_node *node)
 {
struct ice_sched_node *old_parent;
-   u8 i, j;
+   u16 i, j;
 
old_parent = node->parent;
 
@@ -2389,7 +2390,8 @@ ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 
vsi_handle, u32 agg_id,
u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
u32 first_node_teid, vsi_teid;
u16 num_nodes_added;
-   u8 aggl, vsil, i;
+   u8 aggl, vsil;
+   u16 i;
int status;
 
tc_node = ice_sched_get_tc_node(pi, tc);
@@ -2505,7 +2507,8 @@ ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi,
 static bool
 ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node)
 {
-   u8 vsil, i;
+   u8 vsil;
+   u16 i;
 
vsil = ice_sched_get_vsi_layer(pi->hw);
if (node->tx_sched_layer < vsil - 1) {
diff --git a/drivers/net/ice/base/ice_type.h b/drivers/net/ice/base/ice_type.h
index a70e4a8afa..35f832eb9f 100644
--- a/drivers/net/ice/base/ice_type.h
+++ b/drivers/net/ice/base/ice_type.h
@@ -1030,9 +1030,9 @@ struct ice_sched_node {
struct ice_aqc_txsched_elem_data info;
u32 agg_id; /* aggregator group ID */
u16 vsi_handle;
+   u16 num_children;
u8 in_use;  /* suspended or in use */
u8 tx_sched_layer;  /* Logical Layer (1-9) */
-   u8 num_children;
u8 tc_num;
u8 owner;
 #define ICE_SCHED_NODE_OWNER_LAN   0
-- 
2.43.0



[PATCH v3 11/16] net/ice/base: make functions non-static

2024-08-12 Thread Bruce Richardson
We will need to allocate more lanq contexts after a scheduler rework, so
make that function non-static so accessible outside the file. For similar
reasons, make the function to add a Tx scheduler node non-static

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 2 +-
 drivers/net/ice/base/ice_sched.h | 8 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index f7d5f8f415..d88b836c38 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -570,7 +570,7 @@ ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 
num_nodes, u32 *node_teids,
  * @tc: TC number
  * @new_numqs: number of queues
  */
-static int
+int
 ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
 {
struct ice_vsi_ctx *vsi_ctx;
diff --git a/drivers/net/ice/base/ice_sched.h b/drivers/net/ice/base/ice_sched.h
index 9f78516dfb..c7eb794963 100644
--- a/drivers/net/ice/base/ice_sched.h
+++ b/drivers/net/ice/base/ice_sched.h
@@ -270,4 +270,12 @@ int ice_sched_replay_q_bw(struct ice_port_info *pi, struct 
ice_q_ctx *q_ctx);
 int
 ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
enum ice_rl_type rl_type, u16 bw_alloc);
+
+int
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+   struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+   u16 *num_nodes_added, u32 *first_node_teid,
+   struct ice_sched_node **prealloc_nodes);
+int
+ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs);
 #endif /* _ICE_SCHED_H_ */
-- 
2.43.0



[PATCH v3 10/16] net/ice/base: optimize subtree searches

2024-08-12 Thread Bruce Richardson
In a number of places throughout the driver code, we want to confirm
that a scheduler node is indeed a child of another node. Currently, this
is confirmed by searching down the tree from the base until the desired
node is hit, a search which may hit many irrelevant tree nodes when
recursing down wrong branches. By switching the direction of search, to
check upwards from the node to the parent, we can avoid any incorrect
paths, and so speed up processing.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_sched.c | 18 +-
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ice/base/ice_sched.c b/drivers/net/ice/base/ice_sched.c
index be13833e1e..f7d5f8f415 100644
--- a/drivers/net/ice/base/ice_sched.c
+++ b/drivers/net/ice/base/ice_sched.c
@@ -1475,20 +1475,12 @@ ice_sched_find_node_in_subtree(struct ice_hw *hw, 
struct ice_sched_node *base,
 {
u16 i;
 
-   for (i = 0; i < base->num_children; i++) {
-   struct ice_sched_node *child = base->children[i];
-
-   if (node == child)
-   return true;
-
-   if (child->tx_sched_layer > node->tx_sched_layer)
-   return false;
-
-   /* this recursion is intentional, and wouldn't
-* go more than 8 calls
-*/
-   if (ice_sched_find_node_in_subtree(hw, child, node))
+   if (base == node)
+   return true;
+   while (node->tx_sched_layer != 0 && node->parent != NULL) {
+   if (node->parent == base)
return true;
+   node = node->parent;
}
return false;
 }
-- 
2.43.0



[PATCH v3 13/16] net/ice: limit the number of queues to sched capabilities

2024-08-12 Thread Bruce Richardson
Rather than assuming that each VSI can hold up to 256 queue pairs,
or the reported device limit, query the available nodes in the scheduler
tree to check that we are not overflowing the limit for number of
child scheduling nodes at each level. Do this by multiplying
max_children for each level beyond the VSI and using that as an
additional cap on the number of queues.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_ethdev.c | 25 -
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 0d2445a317..ab3f88fd7d 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -913,7 +913,7 @@ ice_vsi_config_default_rss(struct ice_aqc_vsi_props *info)
 }
 
 static int
-ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi,
+ice_vsi_config_tc_queue_mapping(struct ice_hw *hw, struct ice_vsi *vsi,
struct ice_aqc_vsi_props *info,
uint8_t enabled_tcmap)
 {
@@ -929,13 +929,28 @@ ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi,
}
 
/* vector 0 is reserved and 1 vector for ctrl vsi */
-   if (vsi->adapter->hw.func_caps.common_cap.num_msix_vectors < 2)
+   if (vsi->adapter->hw.func_caps.common_cap.num_msix_vectors < 2) {
vsi->nb_qps = 0;
-   else
+   } else {
vsi->nb_qps = RTE_MIN

((uint16_t)vsi->adapter->hw.func_caps.common_cap.num_msix_vectors - 2,
RTE_MIN(vsi->nb_qps, ICE_MAX_Q_PER_TC));
 
+   /* cap max QPs to what the HW reports as num-children for each 
layer.
+* Multiply num_children for each layer from the entry_point 
layer to
+* the qgroup, or second-last layer.
+* Avoid any potential overflow by using uint32_t type and 
breaking loop
+* once we have a number greater than the already configured 
max.
+*/
+   uint32_t max_sched_vsi_nodes = 1;
+   for (uint8_t i = hw->sw_entry_point_layer; i < 
hw->num_tx_sched_layers - 1; i++) {
+   max_sched_vsi_nodes *= hw->max_children[i];
+   if (max_sched_vsi_nodes >= vsi->nb_qps)
+   break;
+   }
+   vsi->nb_qps = RTE_MIN(vsi->nb_qps, max_sched_vsi_nodes);
+   }
+
/* nb_qps(hex)  -> fls */
/*  -> 0 */
/* 0001 -> 0 */
@@ -1707,7 +1722,7 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type)
rte_cpu_to_le_16(hw->func_caps.fd_fltr_best_effort);
 
/* Enable VLAN/UP trip */
-   ret = ice_vsi_config_tc_queue_mapping(vsi,
+   ret = ice_vsi_config_tc_queue_mapping(hw, vsi,
  &vsi_ctx.info,
  ICE_DEFAULT_TCMAP);
if (ret) {
@@ -1731,7 +1746,7 @@ ice_setup_vsi(struct ice_pf *pf, enum ice_vsi_type type)
vsi_ctx.info.fd_options = rte_cpu_to_le_16(cfg);
vsi_ctx.info.sw_id = hw->port_info->sw_id;
vsi_ctx.info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
-   ret = ice_vsi_config_tc_queue_mapping(vsi,
+   ret = ice_vsi_config_tc_queue_mapping(hw, vsi,
  &vsi_ctx.info,
  ICE_DEFAULT_TCMAP);
if (ret) {
-- 
2.43.0



[PATCH v3 12/16] net/ice/base: remove flag checks before topology upload

2024-08-12 Thread Bruce Richardson
DPDK should support more than just 9-level or 5-level topologies, so
remove the checks for those particular settings.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/base/ice_ddp.c | 33 -
 1 file changed, 33 deletions(-)

diff --git a/drivers/net/ice/base/ice_ddp.c b/drivers/net/ice/base/ice_ddp.c
index e6c42c5274..744f015fe5 100644
--- a/drivers/net/ice/base/ice_ddp.c
+++ b/drivers/net/ice/base/ice_ddp.c
@@ -2373,38 +2373,6 @@ int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len)
return status;
}
 
-   /* Is default topology already applied ? */
-   if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
-   hw->num_tx_sched_layers == 9) {
-   ice_debug(hw, ICE_DBG_INIT, "Loaded default topology\n");
-   /* Already default topology is loaded */
-   return ICE_ERR_ALREADY_EXISTS;
-   }
-
-   /* Is new topology already applied ? */
-   if ((flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
-   hw->num_tx_sched_layers == 5) {
-   ice_debug(hw, ICE_DBG_INIT, "Loaded new topology\n");
-   /* Already new topology is loaded */
-   return ICE_ERR_ALREADY_EXISTS;
-   }
-
-   /* Is set topology issued already ? */
-   if (flags & ICE_AQC_TX_TOPO_FLAGS_ISSUED) {
-   ice_debug(hw, ICE_DBG_INIT, "Update tx topology was done by 
another PF\n");
-   /* add a small delay before exiting */
-   for (i = 0; i < 20; i++)
-   ice_msec_delay(100, true);
-   return ICE_ERR_ALREADY_EXISTS;
-   }
-
-   /* Change the topology from new to default (5 to 9) */
-   if (!(flags & ICE_AQC_TX_TOPO_FLAGS_LOAD_NEW) &&
-   hw->num_tx_sched_layers == 5) {
-   ice_debug(hw, ICE_DBG_INIT, "Change topology from 5 to 9 
layers\n");
-   goto update_topo;
-   }
-
pkg_hdr = (struct ice_pkg_hdr *)buf;
state = ice_verify_pkg(pkg_hdr, len);
if (state) {
@@ -2451,7 +2419,6 @@ int ice_cfg_tx_topo(struct ice_hw *hw, u8 *buf, u32 len)
/* Get the new topology buffer */
new_topo = ((u8 *)section) + offset;
 
-update_topo:
/* acquire global lock to make sure that set topology issued
 * by one PF
 */
-- 
2.43.0



[PATCH v3 14/16] net/ice: enhance Tx scheduler hierarchy support

2024-08-12 Thread Bruce Richardson
Increase the flexibility of the Tx scheduler hierarchy support in the
driver. If the HW/firmware allows it, allow creating up to 2k child
nodes per scheduler node. Also expand the number of supported layers to
the max available, rather than always just having 3 layers.  One
restriction on this change is that the topology needs to be configured
and enabled before port queue setup, in many cases, and before port
start in all cases.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_ethdev.c |   9 -
 drivers/net/ice/ice_ethdev.h |  15 +-
 drivers/net/ice/ice_rxtx.c   |  10 +
 drivers/net/ice/ice_tm.c | 500 ++-
 4 files changed, 216 insertions(+), 318 deletions(-)

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index ab3f88fd7d..5a5967ff71 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -3832,7 +3832,6 @@ ice_dev_start(struct rte_eth_dev *dev)
int mask, ret;
uint8_t timer = hw->func_caps.ts_func_info.tmr_index_owned;
uint32_t pin_idx = ad->devargs.pin_idx;
-   struct rte_tm_error tm_err;
ice_declare_bitmap(pmask, ICE_PROMISC_MAX);
ice_zero_bitmap(pmask, ICE_PROMISC_MAX);
 
@@ -3864,14 +3863,6 @@ ice_dev_start(struct rte_eth_dev *dev)
}
}
 
-   if (pf->tm_conf.committed) {
-   ret = ice_do_hierarchy_commit(dev, pf->tm_conf.clear_on_fail, 
&tm_err);
-   if (ret) {
-   PMD_DRV_LOG(ERR, "fail to commit Tx scheduler");
-   goto rx_err;
-   }
-   }
-
ice_set_rx_function(dev);
ice_set_tx_function(dev);
 
diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index f31addb122..cb1a7e8e0d 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -479,14 +479,6 @@ struct ice_tm_node {
struct ice_sched_node *sched_node;
 };
 
-/* node type of Traffic Manager */
-enum ice_tm_node_type {
-   ICE_TM_NODE_TYPE_PORT,
-   ICE_TM_NODE_TYPE_QGROUP,
-   ICE_TM_NODE_TYPE_QUEUE,
-   ICE_TM_NODE_TYPE_MAX,
-};
-
 /* Struct to store all the Traffic Manager configuration. */
 struct ice_tm_conf {
struct ice_shaper_profile_list shaper_profile_list;
@@ -690,9 +682,6 @@ int ice_rem_rss_cfg_wrap(struct ice_pf *pf, uint16_t vsi_id,
 struct ice_rss_hash_cfg *cfg);
 void ice_tm_conf_init(struct rte_eth_dev *dev);
 void ice_tm_conf_uninit(struct rte_eth_dev *dev);
-int ice_do_hierarchy_commit(struct rte_eth_dev *dev,
-   int clear_on_fail,
-   struct rte_tm_error *error);
 extern const struct rte_tm_ops ice_tm_ops;
 
 static inline int
@@ -750,4 +739,8 @@ int rte_pmd_ice_dump_switch(uint16_t port, uint8_t **buff, 
uint32_t *size);
 
 __rte_experimental
 int rte_pmd_ice_dump_txsched(uint16_t port, bool detail, FILE *stream);
+
+int
+ice_tm_setup_txq_node(struct ice_pf *pf, struct ice_hw *hw, uint16_t qid, 
uint32_t node_teid);
+
 #endif /* _ICE_ETHDEV_H_ */
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index a150d28e73..7a421bb364 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -747,6 +747,7 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
int err;
struct ice_vsi *vsi;
struct ice_hw *hw;
+   struct ice_pf *pf;
struct ice_aqc_add_tx_qgrp *txq_elem;
struct ice_tlan_ctx tx_ctx;
int buf_len;
@@ -777,6 +778,7 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
 
vsi = txq->vsi;
hw = ICE_VSI_TO_HW(vsi);
+   pf = ICE_VSI_TO_PF(vsi);
 
memset(&tx_ctx, 0, sizeof(tx_ctx));
txq_elem->num_txqs = 1;
@@ -812,6 +814,14 @@ ice_tx_queue_start(struct rte_eth_dev *dev, uint16_t 
tx_queue_id)
/* store the schedule node id */
txq->q_teid = txq_elem->txqs[0].q_teid;
 
+   /* move the queue to correct position in hierarchy, if explicit 
hierarchy configured */
+   if (pf->tm_conf.committed)
+   if (ice_tm_setup_txq_node(pf, hw, tx_queue_id, txq->q_teid) != 
0) {
+   PMD_DRV_LOG(ERR, "Failed to set up txq traffic 
management node");
+   rte_free(txq_elem);
+   return -EIO;
+   }
+
dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
 
rte_free(txq_elem);
diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 459446a6b0..80039c8aff 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -1,17 +1,17 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2022 Intel Corporation
  */
+#include 
 #include 
 
 #include "ice_ethdev.h"
 #include "ice_rxtx.h"
 
-#define MAX_CHILDREN_PER_SCHED_NODE8
-#define MAX_CHILDREN_PER_TM_NODE   256
+#define MAX_CHILDREN_PER_TM_NODE   2048
 
 static int ice_hierarchy_com

[PATCH v3 15/16] net/ice: add minimal capability reporting API

2024-08-12 Thread Bruce Richardson
Incomplete but reports number of available layers

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_ethdev.h |  1 +
 drivers/net/ice/ice_tm.c | 17 +
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
index cb1a7e8e0d..6bebc511e4 100644
--- a/drivers/net/ice/ice_ethdev.h
+++ b/drivers/net/ice/ice_ethdev.h
@@ -682,6 +682,7 @@ int ice_rem_rss_cfg_wrap(struct ice_pf *pf, uint16_t vsi_id,
 struct ice_rss_hash_cfg *cfg);
 void ice_tm_conf_init(struct rte_eth_dev *dev);
 void ice_tm_conf_uninit(struct rte_eth_dev *dev);
+
 extern const struct rte_tm_ops ice_tm_ops;
 
 static inline int
diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 80039c8aff..3dcd091c38 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -33,8 +33,12 @@ static int ice_shaper_profile_add(struct rte_eth_dev *dev,
 static int ice_shaper_profile_del(struct rte_eth_dev *dev,
   uint32_t shaper_profile_id,
   struct rte_tm_error *error);
+static int ice_tm_capabilities_get(struct rte_eth_dev *dev,
+   struct rte_tm_capabilities *cap,
+   struct rte_tm_error *error);
 
 const struct rte_tm_ops ice_tm_ops = {
+   .capabilities_get = ice_tm_capabilities_get,
.shaper_profile_add = ice_shaper_profile_add,
.shaper_profile_delete = ice_shaper_profile_del,
.node_add = ice_tm_node_add,
@@ -864,3 +868,16 @@ ice_hierarchy_commit(struct rte_eth_dev *dev,
PMD_DRV_LOG(DEBUG, "Time to apply hierarchy = %.1f\n", (float)time / 
rte_get_timer_hz());
return ret;
 }
+
+static int
+ice_tm_capabilities_get(struct rte_eth_dev *dev, struct rte_tm_capabilities 
*cap,
+   struct rte_tm_error *error)
+{
+   struct ice_hw *hw = ICE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+   *cap = (struct rte_tm_capabilities){
+   .n_levels_max = hw->num_tx_sched_layers - hw->port_info->has_tc,
+   };
+   if (error)
+   error->type = RTE_TM_ERROR_TYPE_NONE;
+   return 0;
+}
-- 
2.43.0



[PATCH v3 16/16] net/ice: do early check on node level when adding

2024-08-12 Thread Bruce Richardson
When adding a new scheduler node, the parameters for leaf nodes and
non-leaf nodes are different, and which parameter checks are done is
determined by checking the node level i.e. if it's the lowest (leaf)
node level or not. However, if the node level itself is incorrectly
specified, the error messages got can be confusing since the user may
add a leaf node using e.g. the testpmd command to explicitly add a leaf
node, yet get error messages only relevant to non-leaf nodes due to an
incorrect level parameter.

We can avoid these confusing errors by doing a check that the level
matches "parent->level + 1" before doing a more detailed parameter
check.

Signed-off-by: Bruce Richardson 
---
 drivers/net/ice/ice_tm.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ice/ice_tm.c b/drivers/net/ice/ice_tm.c
index 3dcd091c38..e05ad8a8e7 100644
--- a/drivers/net/ice/ice_tm.c
+++ b/drivers/net/ice/ice_tm.c
@@ -426,6 +426,13 @@ ice_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
if (level_id == RTE_TM_NODE_LEVEL_ID_ANY && parent_node != NULL)
level_id = parent_node->level + 1;
 
+   /* check level */
+   if (parent_node != NULL && level_id != parent_node->level + 1) {
+   error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+   error->message = "Wrong level";
+   return -EINVAL;
+   }
+
ret = ice_node_param_check(node_id, priority, weight,
params, level_id == ice_get_leaf_level(hw), error);
if (ret)
@@ -493,12 +500,6 @@ ice_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
error->message = "parent is not valid";
return -EINVAL;
}
-   /* check level */
-   if (level_id != parent_node->level + 1) {
-   error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
-   error->message = "Wrong level";
-   return -EINVAL;
-   }
 
/* check the max children allowed at this level */
if (parent_node->reference_count >= 
hw->max_children[parent_node->level]) {
-- 
2.43.0



Re: [PATCH v1] dts: add verify argument to set forward mode

2024-08-12 Thread Jeremy Spewock
On Mon, Aug 12, 2024 at 10:23 AM Dean Marx  wrote:
>
> Add optional verify argument to the set_forward_mode
> method in testpmd shell.
>
> Signed-off-by: Dean Marx 

The patch all looks good to me, but it might be beneficial to add a
"fixes" tag as well as a "bugzilla ID" tag to the commit body. The
DPDK contribution guidelines explains how to format the tags [1] and
also which order they should appear in [2].

[1] 
https://doc.dpdk.org/guides/contributing/patches.html#patch-fix-related-issues
[2] https://doc.dpdk.org/guides/contributing/patches.html#tag-order

> 2.44.0
>


[PATCH] net/ena: revert redefining memcpy

2024-08-12 Thread Stephen Hemminger
Redefining memcpy as rte_memcpy has no performance gain on
current compilers, and introduced bugs like this one where
rte_memcpy() will be detected as referencing past the destination.

Bugzilla ID: 1510
Fixes: 142778b3702a ("net/ena: switch memcpy to optimized version")

Signed-off-by: Stephen Hemminger 
---
 drivers/net/ena/base/ena_plat_dpdk.h | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/ena/base/ena_plat_dpdk.h 
b/drivers/net/ena/base/ena_plat_dpdk.h
index 21b96113c7..291db6cabe 100644
--- a/drivers/net/ena/base/ena_plat_dpdk.h
+++ b/drivers/net/ena/base/ena_plat_dpdk.h
@@ -26,7 +26,6 @@
 #include 
 
 #include 
-#include 
 
 typedef uint64_t u64;
 typedef uint32_t u32;
@@ -70,13 +69,7 @@ typedef uint64_t dma_addr_t;
 #define ENA_UDELAY(x) rte_delay_us_block(x)
 
 #define ENA_TOUCH(x) ((void)(x))
-/* Redefine memcpy with caution: rte_memcpy can be simply aliased to memcpy, so
- * make the redefinition only if it's safe (and beneficial) to do so.
- */
-#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64_MEMCPY) || 
defined(RTE_ARCH_ARM_NEON_MEMCPY)
-#undef memcpy
-#define memcpy rte_memcpy
-#endif
+
 #define wmb rte_wmb
 #define rmb rte_rmb
 #define mb rte_mb
-- 
2.43.0



[PATCH v1] dts: add package mode config and updated README

2024-08-12 Thread Dean Marx
In the current DTS setup description, the user installs poetry
with the --no-root option. However, adding 'package-mode = false'
to the pyproject.toml sets the same configuration, and running
poetry install --no-root will become an error in a future
poetry version.

Signed-off-by: Dean Marx 
---
 dts/README.md  | 4 ++--
 dts/pyproject.toml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/dts/README.md b/dts/README.md
index ee3fa1c968..2b3a7f89c5 100644
--- a/dts/README.md
+++ b/dts/README.md
@@ -37,7 +37,7 @@ to allow you to connect to hosts without specifying a 
password.
 ```shell
 docker build --target dev -t dpdk-dts .
 docker run -v $(pwd)/..:/dpdk -v /home/dtsuser/.ssh:/root/.ssh:ro -it dpdk-dts 
bash
-$ poetry install --no-root
+$ poetry install
 $ poetry shell
 ```
 
@@ -46,7 +46,7 @@ $ poetry shell
 ```shell
 docker build --target dev -t dpdk-dts .
 docker run -v $(pwd)/..:/dpdk -it dpdk-dts bash
-$ poetry install --no-root
+$ poetry install
 $ poetry shell
 ```
 
diff --git a/dts/pyproject.toml b/dts/pyproject.toml
index 0b9b09805a..712af2f01d 100644
--- a/dts/pyproject.toml
+++ b/dts/pyproject.toml
@@ -3,6 +3,7 @@
 # Copyright(c) 2023 PANTHEON.tech s.r.o.
 
 [tool.poetry]
+package-mode = false
 name = "dts"
 version = "0.1.0"
 description = "DPDK Test Suite."
-- 
2.44.0



[PATCH 0/1] dts: add driver binding on TG

2024-08-12 Thread jspewock
From: Jeremy Spewock 

Previously in DTS there was support for binding ports a node to
different drivers on a SUT, but there was no option on the TG. Since
there are likely to be some traffic generators in the future that would
require different drivers to operate properly, this support is something
that would likely be useful to have, and it very simple to add. All that
is done in this patch is moving functionality for copying the DPDK
tarball onto a host out of the SUT node and into the generic Node class
so that both the TG and the SUT can take advantage of DPDK tools. It
should be noted however that the TG node still does not build DPDK as it
likely wouldn't need the compiled binaries.

Jeremy Spewock (1):
  dts: add binding to different drivers to TG node

 dts/framework/runner.py |   2 +
 dts/framework/testbed_model/node.py | 106 +++-
 dts/framework/testbed_model/sut_node.py |  86 +--
 3 files changed, 109 insertions(+), 85 deletions(-)

-- 
2.45.2



[PATCH 1/1] dts: add binding to different drivers to TG node

2024-08-12 Thread jspewock
From: Jeremy Spewock 

The DTS framework in its current state supports binding ports to
different drivers on the SUT node but not the TG node. The TG node
already has the information that it needs about the different drivers
that it has available in the configuration file, but it did not
previously have access to the devbind script, so it did not use that
information for anything.

This patch moves the steps to copy the DPDK tarball into the node class
rather than the SUT node class, and calls this function on the TG node
as well as the SUT. It also moves the driver binding step into the Node
class and triggers the same pattern of binding to ports that existed on
the SUT on the TG.

Bugzilla ID: 1420

Signed-off-by: Jeremy Spewock 
---
 dts/framework/runner.py |   2 +
 dts/framework/testbed_model/node.py | 106 +++-
 dts/framework/testbed_model/sut_node.py |  86 +--
 3 files changed, 109 insertions(+), 85 deletions(-)

diff --git a/dts/framework/runner.py b/dts/framework/runner.py
index 6b6f6a05f5..ed9e58b172 100644
--- a/dts/framework/runner.py
+++ b/dts/framework/runner.py
@@ -484,6 +484,7 @@ def _run_build_target(
 
 try:
 sut_node.set_up_build_target(build_target_config)
+tg_node.set_up_build_target(build_target_config)
 self._result.dpdk_version = sut_node.dpdk_version
 
build_target_result.add_build_target_info(sut_node.get_build_target_info())
 build_target_result.update_setup(Result.PASS)
@@ -498,6 +499,7 @@ def _run_build_target(
 try:
 self._logger.set_stage(DtsStage.build_target_teardown)
 sut_node.tear_down_build_target()
+tg_node.tear_down_build_target()
 build_target_result.update_teardown(Result.PASS)
 except Exception as e:
 self._logger.exception("Build target teardown failed.")
diff --git a/dts/framework/testbed_model/node.py 
b/dts/framework/testbed_model/node.py
index 12a40170ac..8e6181e424 100644
--- a/dts/framework/testbed_model/node.py
+++ b/dts/framework/testbed_model/node.py
@@ -13,11 +13,19 @@
 The :func:`~Node.skip_setup` decorator can be used without subclassing.
 """
 
+import os
+import tarfile
 from abc import ABC
 from ipaddress import IPv4Interface, IPv6Interface
+from pathlib import PurePath
 from typing import Any, Callable, Union
 
-from framework.config import OS, NodeConfiguration, TestRunConfiguration
+from framework.config import (
+OS,
+BuildTargetConfiguration,
+NodeConfiguration,
+TestRunConfiguration,
+)
 from framework.exception import ConfigurationError
 from framework.logger import DTSLogger, get_dts_logger
 from framework.settings import SETTINGS
@@ -58,8 +66,11 @@ class Node(ABC):
 lcores: list[LogicalCore]
 ports: list[Port]
 _logger: DTSLogger
+_remote_tmp_dir: PurePath
+__remote_dpdk_dir: PurePath | None
 _other_sessions: list[OSSession]
 _test_run_config: TestRunConfiguration
+_path_to_devbind_script: PurePath | None
 
 def __init__(self, node_config: NodeConfiguration):
 """Connect to the node and gather info during initialization.
@@ -88,6 +99,9 @@ def __init__(self, node_config: NodeConfiguration):
 
 self._other_sessions = []
 self._init_ports()
+self._remote_tmp_dir = self.main_session.get_remote_tmp_dir()
+self.__remote_dpdk_dir = None
+self._path_to_devbind_script = None
 
 def _init_ports(self) -> None:
 self.ports = [Port(self.name, port_config) for port_config in 
self.config.ports]
@@ -95,6 +109,34 @@ def _init_ports(self) -> None:
 for port in self.ports:
 self.configure_port_state(port)
 
+def _guess_dpdk_remote_dir(self) -> PurePath:
+return self.main_session.guess_dpdk_remote_dir(self._remote_tmp_dir)
+
+@property
+def _remote_dpdk_dir(self) -> PurePath:
+"""The remote DPDK dir.
+
+This internal property should be set after extracting the DPDK 
tarball. If it's not set,
+that implies the DPDK setup step has been skipped, in which case we 
can guess where
+a previous build was located.
+"""
+if self.__remote_dpdk_dir is None:
+self.__remote_dpdk_dir = self._guess_dpdk_remote_dir()
+return self.__remote_dpdk_dir
+
+@_remote_dpdk_dir.setter
+def _remote_dpdk_dir(self, value: PurePath) -> None:
+self.__remote_dpdk_dir = value
+
+@property
+def path_to_devbind_script(self) -> PurePath:
+"""The path to the dpdk-devbind.py script on the node."""
+if self._path_to_devbind_script is None:
+self._path_to_devbind_script = self.main_session.join_remote_path(
+self._remote_dpdk_dir, "usertools", "dpdk-devbind.py"
+)
+return self._path_to_devbind_script
+
 def set_up_test_run(self, test_run_config: TestRunConfiguration) -> N

RE: [PATCH v1 1/3] bbdev: new queue stat for available enqueue depth

2024-08-12 Thread Chautru, Nicolas
Hi Maxime, 

The branch origin/next-baseband-for-main doesn’t have yet the updates from 
main, such as the ./doc/rel_notes for 24.11. 
Can you refresh this? Or let me know how best to proceed.
Thanks, 
Nic

> -Original Message-
> From: Maxime Coquelin 
> Sent: Monday, August 12, 2024 2:29 AM
> To: Chautru, Nicolas ; dev@dpdk.org
> Cc: hemant.agra...@nxp.com; Marchand, David
> ; Vargas, Hernan
> 
> Subject: Re: [PATCH v1 1/3] bbdev: new queue stat for available enqueue
> depth
> 
> Hi Nicolas,
> 
> On 4/4/24 23:04, Nicolas Chautru wrote:
> > Capturing additional queue stats counter for the depth of enqueue
> > batch still available on the given queue. This can help application to
> > monitor that depth at run time.
> >
> > Signed-off-by: Nicolas Chautru 
> > ---
> >   lib/bbdev/rte_bbdev.h | 2 ++
> >   1 file changed, 2 insertions(+)
> >
> > diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index
> > 0cbfdd1c95..25514c58ac 100644
> > --- a/lib/bbdev/rte_bbdev.h
> > +++ b/lib/bbdev/rte_bbdev.h
> > @@ -283,6 +283,8 @@ struct rte_bbdev_stats {
> >  * bbdev operation
> >  */
> > uint64_t acc_offload_cycles;
> > +   /** Available number of enqueue batch on that queue. */
> > +   uint16_t enqueue_depth_avail;
> >   };
> >
> >   /**
> 
> I think it needs to be documented in the ABI change section.
> 
> With that done, feel free to add:
> 
> Maxime Coquelin 
> 
> Thanks,
> Maxime



Re: [PATCH v3 1/1] dts: add text parser for testpmd verbose output

2024-08-12 Thread Nicholas Pratte
Great work!

I think it'd be fine to just keep if you ask me, I could see this
being used in the future. I'm also looking at it from the perspective
of 'what if i would have to write this myself,' if it turns out that
we need it again for something later. It's easier to remove later if
it turns out we aren't using it, but it'd likely be more
time-consuming to remove it now and implement it again later,
considering that time has already been spent testing and building it.

On Thu, Aug 8, 2024 at 5:49 PM Jeremy Spewock  wrote:
>
> On Thu, Aug 8, 2024 at 4:36 PM  wrote:
> >
> > From: Jeremy Spewock 
> >
> > Multiple test suites from the old DTS framework rely on being able to
> > consume and interpret the verbose output of testpmd. The new framework
> > doesn't have an elegant way for handling the verbose output, but test
> > suites are starting to be written that rely on it. This patch creates a
> > TextParser class that can be used to extract the verbose information
> > from any testpmd output and also adjusts the `stop` method of the shell
> > to return all output that it collected.
> >
> > Signed-off-by: Jeremy Spewock 
> > ---
> >  dts/framework/parser.py   |  30 ++
> >  dts/framework/remote_session/testpmd_shell.py | 405 +-
> >  dts/framework/utils.py|   1 +
> >  3 files changed, 434 insertions(+), 2 deletions(-)
> >
> > diff --git a/dts/framework/parser.py b/dts/framework/parser.py
> > index 741dfff821..0b39025a48 100644
> > --- a/dts/framework/parser.py
> > +++ b/dts/framework/parser.py
> > @@ -160,6 +160,36 @@ def _find(text: str) -> Any:
> >
> >  return ParserFn(TextParser_fn=_find)
> >
> > +@staticmethod
> > +def find_all(
>
> I just realized that I forgot to take this method out since it is no
> longer used in this patch now that I removed the idea of a block of
> output that contains a burst of packets. The method could still be
> useful in the future, but it isn't used anywhere now, so I can remove
> it in the next version if no one sees a use for it. I'm also open to
> leaving it there for the "just in case" it is needed in the future.
> What do you all think?
>
> > +pattern: str | re.Pattern[str],
> > +flags: re.RegexFlag = re.RegexFlag(0),
> > +) -> ParserFn:
> > +"""Makes a parser function that finds all of the regular 
> > expression matches in the text.
> > +
> > +If there are no matches found in the text than None will be 
> > returned, otherwise a list
> > +containing all matches will be returned. Patterns that contain 
> > multiple groups will pack
> > +the matches for each group into a tuple.
> > +
> > +Args:
> > +pattern: The regular expression pattern.
> > +flags: The regular expression flags. Ignored if the given 
> > pattern is already compiled.
> > +
> > +Returns:
> > +A :class:`ParserFn` that can be used as metadata for a 
> > dataclass field.
> > +"""
> > +if isinstance(pattern, str):
> > +pattern = re.compile(pattern, flags)
> > +
> > +def _find_all(text: str) -> list[str] | None:
> > +m = pattern.findall(text)
> > +if len(m) == 0:
> > +return None
> > +
> > +return m
> > +
> > +return ParserFn(TextParser_fn=_find_all)
> > +
> 
> > 2.45.2
> >


Re: [PATCH 1/1] dts: add binding to different drivers to TG node

2024-08-12 Thread Nicholas Pratte
Makes sense to me!

Reviewed-by: Nicholas Pratte 

On Mon, Aug 12, 2024 at 1:23 PM  wrote:
>
> From: Jeremy Spewock 
>
> The DTS framework in its current state supports binding ports to
> different drivers on the SUT node but not the TG node. The TG node
> already has the information that it needs about the different drivers
> that it has available in the configuration file, but it did not
> previously have access to the devbind script, so it did not use that
> information for anything.
>
> This patch moves the steps to copy the DPDK tarball into the node class
> rather than the SUT node class, and calls this function on the TG node
> as well as the SUT. It also moves the driver binding step into the Node
> class and triggers the same pattern of binding to ports that existed on
> the SUT on the TG.
>
> Bugzilla ID: 1420
>
> Signed-off-by: Jeremy Spewock 
> ---
>  dts/framework/runner.py |   2 +
>  dts/framework/testbed_model/node.py | 106 +++-
>  dts/framework/testbed_model/sut_node.py |  86 +--
>  3 files changed, 109 insertions(+), 85 deletions(-)
>
> diff --git a/dts/framework/runner.py b/dts/framework/runner.py
> index 6b6f6a05f5..ed9e58b172 100644
> --- a/dts/framework/runner.py
> +++ b/dts/framework/runner.py
> @@ -484,6 +484,7 @@ def _run_build_target(
>
>  try:
>  sut_node.set_up_build_target(build_target_config)
> +tg_node.set_up_build_target(build_target_config)
>  self._result.dpdk_version = sut_node.dpdk_version
>  
> build_target_result.add_build_target_info(sut_node.get_build_target_info())
>  build_target_result.update_setup(Result.PASS)
> @@ -498,6 +499,7 @@ def _run_build_target(
>  try:
>  self._logger.set_stage(DtsStage.build_target_teardown)
>  sut_node.tear_down_build_target()
> +tg_node.tear_down_build_target()
>  build_target_result.update_teardown(Result.PASS)
>  except Exception as e:
>  self._logger.exception("Build target teardown failed.")
> diff --git a/dts/framework/testbed_model/node.py 
> b/dts/framework/testbed_model/node.py
> index 12a40170ac..8e6181e424 100644
> --- a/dts/framework/testbed_model/node.py
> +++ b/dts/framework/testbed_model/node.py
> @@ -13,11 +13,19 @@
>  The :func:`~Node.skip_setup` decorator can be used without subclassing.
>  """
>
> +import os
> +import tarfile
>  from abc import ABC
>  from ipaddress import IPv4Interface, IPv6Interface
> +from pathlib import PurePath
>  from typing import Any, Callable, Union
>
> -from framework.config import OS, NodeConfiguration, TestRunConfiguration
> +from framework.config import (
> +OS,
> +BuildTargetConfiguration,
> +NodeConfiguration,
> +TestRunConfiguration,
> +)
>  from framework.exception import ConfigurationError
>  from framework.logger import DTSLogger, get_dts_logger
>  from framework.settings import SETTINGS
> @@ -58,8 +66,11 @@ class Node(ABC):
>  lcores: list[LogicalCore]
>  ports: list[Port]
>  _logger: DTSLogger
> +_remote_tmp_dir: PurePath
> +__remote_dpdk_dir: PurePath | None
>  _other_sessions: list[OSSession]
>  _test_run_config: TestRunConfiguration
> +_path_to_devbind_script: PurePath | None
>
>  def __init__(self, node_config: NodeConfiguration):
>  """Connect to the node and gather info during initialization.
> @@ -88,6 +99,9 @@ def __init__(self, node_config: NodeConfiguration):
>
>  self._other_sessions = []
>  self._init_ports()
> +self._remote_tmp_dir = self.main_session.get_remote_tmp_dir()
> +self.__remote_dpdk_dir = None
> +self._path_to_devbind_script = None
>
>  def _init_ports(self) -> None:
>  self.ports = [Port(self.name, port_config) for port_config in 
> self.config.ports]
> @@ -95,6 +109,34 @@ def _init_ports(self) -> None:
>  for port in self.ports:
>  self.configure_port_state(port)
>
> +def _guess_dpdk_remote_dir(self) -> PurePath:
> +return self.main_session.guess_dpdk_remote_dir(self._remote_tmp_dir)
> +
> +@property
> +def _remote_dpdk_dir(self) -> PurePath:
> +"""The remote DPDK dir.
> +
> +This internal property should be set after extracting the DPDK 
> tarball. If it's not set,
> +that implies the DPDK setup step has been skipped, in which case we 
> can guess where
> +a previous build was located.
> +"""
> +if self.__remote_dpdk_dir is None:
> +self.__remote_dpdk_dir = self._guess_dpdk_remote_dir()
> +return self.__remote_dpdk_dir
> +
> +@_remote_dpdk_dir.setter
> +def _remote_dpdk_dir(self, value: PurePath) -> None:
> +self.__remote_dpdk_dir = value
> +
> +@property
> +def path_to_devbind_script(self) -> PurePath:
> +"""The path to the dpdk-devbind.py script on the node."""
> +if self.

[PATCH v7 0/3] Independent Enqueue Support

2024-08-12 Thread Abdullah Sevincer
v7: Address documentation reviews.
v6: Update patch with more documentation
v5: Address build issues
v4: Address comments
v3: Fix CI/build issues
v2: Fix CI/build issues
v1: Initial patchset

Abdullah Sevincer (3):
  event/dlb2: add support for independent enqueue
  eventdev: add support for independent enqueue
  event/dsw: add capability for independent enqueue

 doc/guides/eventdevs/dlb2.rst |  41 ++
 doc/guides/eventdevs/features/default.ini |   1 +
 doc/guides/eventdevs/features/dlb2.ini|   1 +
 doc/guides/rel_notes/release_24_11.rst|  34 +-
 drivers/event/dlb2/dlb2.c | 492 ++
 drivers/event/dlb2/dlb2_avx512.c  |  27 +-
 drivers/event/dlb2/dlb2_inline_fns.h  |   8 +
 drivers/event/dlb2/dlb2_priv.h|  25 +-
 drivers/event/dlb2/rte_pmd_dlb2.h |  10 +
 drivers/event/dsw/dsw_evdev.c |   3 +-
 lib/eventdev/rte_eventdev.h   |  37 ++
 11 files changed, 463 insertions(+), 216 deletions(-)

-- 
2.25.1



[PATCH v7 1/3] event/dlb2: add support for independent enqueue

2024-08-12 Thread Abdullah Sevincer
DLB devices need events to be enqueued in the same order they are
dequeued. Applications are not suppose to change event order between
dequeue and to enqueue. Since Eventdev standard does not add such
restrictions independent enqueue support is needed for DLB PMD so that
it restores dequeue order on enqueue if applications happen to change
it. It also adds missing releases in places where events are dropped
by the application and it expects implicit release to handle it.

By default the feature will be off on all DLB ports and they will
behave the same as older releases. To enable reordering feature,
applications need to add the flag RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ
to port configuration if only the device advertises the capability
RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/eventdevs/dlb2.rst  |  41 +++
 doc/guides/rel_notes/release_24_11.rst |  33 +-
 drivers/event/dlb2/dlb2.c  | 492 -
 drivers/event/dlb2/dlb2_avx512.c   |  27 +-
 drivers/event/dlb2/dlb2_inline_fns.h   |   8 +
 drivers/event/dlb2/dlb2_priv.h |  25 +-
 drivers/event/dlb2/rte_pmd_dlb2.h  |  10 +
 7 files changed, 417 insertions(+), 219 deletions(-)

diff --git a/doc/guides/eventdevs/dlb2.rst b/doc/guides/eventdevs/dlb2.rst
index 2532d92888..d74c6f7fd1 100644
--- a/doc/guides/eventdevs/dlb2.rst
+++ b/doc/guides/eventdevs/dlb2.rst
@@ -456,6 +456,47 @@ Example command to enable QE Weight feature:
 
--allow ea:00.0,enable_cq_weight=
 
+Independent Enqueue Capability
+~~
+
+DLB2 hardware device expects all forwarded events to be enqueued in the same
+order as they are dequeued. For dropped events, their releases should come at
+the same location as the original event was expected. Hardware has this
+restriction as it uses the order to retrieve information about the original
+event that was sent to the CPU.  This contains information like atomic flow
+ID to release the flow lock and ordered events sequence number to restore the
+original order.
+
+Some applications, like those based on the DPDK dispatcher library, want
+enqueue order independence. To support this, DLB2 PMD supports the
+``RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ``capability.
+
+This capability applies to Eventdevs supporting burst mode. On ports where
+the application is going to change enqueue order,
+``RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ`` support should be enabled.
+
+Example code to inform PMD that the application plans to use independent 
enqueue
+order on a port:
+
+.. code-block:: c
+
+   if (capability & RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ)
+ port_config = port_config | RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ;
+
+This code example enables enqueue event reordering inside DLB2 PMD before the 
events
+are sent to the DLB2 hardware. If the application is not going to change the 
enqueue
+order, this flag should not be enabled to get better performance. DLB2 PMD 
saves
+ordering information inside the impl_opaque field of the event, and this field 
should
+be preserved for all FORWARD or RELEASE events. Following MACROs are provided 
to get
+and set this field inside the event in case the same event is not used for 
forwarding
+(e.g., a new RELEASE event is created when the original event is dropped 
instead of
+reusing the same event).
+
+.. code-block:: c
+
+   #define RTE_EVENT_GET_IMPL_OPAQUE(ev)  (ev->impl_opaque)
+   #define RTE_EVENT_SET_IMPL_OPAQUE(ev, val)  (ev->impl_opaque = val)
+
 Running Eventdev Applications with DLB Device
 -
 
diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..f0ec07c263 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -24,36 +24,11 @@ DPDK Release 24.11
 New Features
 
 
-.. This section should contain new features added in this release.
-   Sample format:
+* **Updated DLB2 Driver for independent enqueue feature**
 
-   * **Add a title in the past tense with a full stop.**
-
- Add a short 1-2 sentence description in the past tense.
- The description should be enough to allow someone scanning
- the release notes to understand the new feature.
-
- If the feature adds a lot of sub-features you can use a bullet list
- like this:
-
- * Added feature foo to do something.
- * Enhanced feature bar to do something else.
-
- Refer to the previous release notes for examples.
-
- Suggested order in release notes items:
- * Core libs (EAL, mempool, ring, mbuf, buses)
- * Device abstraction libs and PMDs (ordered alphabetically by vendor name)
-   - ethdev (lib, PMDs)
-   - cryptodev (lib, PMDs)
-   - eventdev (lib, PMDs)
-   - etc
- * Other libs
- * Apps, Examples, Tools (if significant)
-
- This section is a comment. Do not overwrite or remove it.
- Also, make 

[PATCH v7 2/3] eventdev: add support for independent enqueue

2024-08-12 Thread Abdullah Sevincer
This commit adds support for independent enqueue feature
and updates Event Device and PMD feature list.

A new capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ is introduced
to support independent enqueue to support PMD to enqueue in any order
even the underlined hardware device needs enqueues in a strict dequeue
order.

To use this capability applications need to set flag
RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ during port setup only if the
capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ exists.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/eventdevs/features/default.ini |  1 +
 doc/guides/eventdevs/features/dlb2.ini|  1 +
 doc/guides/rel_notes/release_24_11.rst|  5 +++
 lib/eventdev/rte_eventdev.h   | 37 +++
 4 files changed, 44 insertions(+)

diff --git a/doc/guides/eventdevs/features/default.ini 
b/doc/guides/eventdevs/features/default.ini
index 1cc4303fe5..7c4ee99238 100644
--- a/doc/guides/eventdevs/features/default.ini
+++ b/doc/guides/eventdevs/features/default.ini
@@ -22,6 +22,7 @@ carry_flow_id  =
 maintenance_free   =
 runtime_queue_attr =
 profile_links  =
+independent_enq=
 
 ;
 ; Features of a default Ethernet Rx adapter.
diff --git a/doc/guides/eventdevs/features/dlb2.ini 
b/doc/guides/eventdevs/features/dlb2.ini
index 7b80286927..c7193b47c1 100644
--- a/doc/guides/eventdevs/features/dlb2.ini
+++ b/doc/guides/eventdevs/features/dlb2.ini
@@ -15,6 +15,7 @@ implicit_release_disable   = Y
 runtime_port_link  = Y
 multiple_queue_port= Y
 maintenance_free   = Y
+independent_enq= Y
 
 [Eth Rx adapter Features]
 
diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index f0ec07c263..04f389876a 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -30,6 +30,11 @@ New Features
   ``RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ`` to enable the feature if the 
capability
   ``RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ`` exists.
 
+* **Updated Event Device Library for independent enqueue feature**
+
+  * Added support for independent enqueue feature. Updated Event Device and
+PMD feature list.
+
 
 Removed Items
 -
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..48e6eadda9 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -446,6 +446,31 @@ struct rte_event;
  * @see RTE_SCHED_TYPE_PARALLEL
  */
 
+#define RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ  (1ULL << 16)
+/**< Event device is capable of independent enqueue.
+ * A new capability, RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ, will indicate that 
Eventdev
+ * supports the enqueue in any order or specifically in a different order than 
the
+ * dequeue. Eventdev PMD can either transmit events in the changed order in 
which
+ * they are enqueued or restore the original order before sending them to the
+ * underlying hardware device. A flag is provided during the port 
configuration to
+ * inform Eventdev PMD that the application intends to use an independent 
enqueue
+ * order on a particular port. Note that this capability only matters for 
Eventdevs
+ * supporting burst mode.
+ *
+ * To Inform PMD that the application plans to use independent enqueue order 
on a port
+ * this code example can be used:
+ *
+ *  if (capability & RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ)
+ * port_config = port_config | RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ;
+ *
+ * When an implicit release is enabled on a port, Eventdev PMD will also handle
+ * the insertion of RELEASE events in place of dropped events. The independent 
enqueue
+ * feature only applies to FORWARD and RELEASE events. New events 
(op=RTE_EVENT_OP_NEW)
+ * will be transmitted in the order the application enqueues them and do not 
maintain
+ * any order relative to FORWARD/RELEASE events. FORWARD vs NEW relaxed 
ordering
+ * only applies to ports that have enabled independent enqueue feature.
+ */
+
 /* Event device priority levels */
 #define RTE_EVENT_DEV_PRIORITY_HIGHEST   0
 /**< Highest priority level for events and queues.
@@ -1072,6 +1097,18 @@ rte_event_queue_attr_set(uint8_t dev_id, uint8_t 
queue_id, uint32_t attr_id,
  *
  *  @see rte_event_port_setup()
  */
+ #define RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ   (1ULL << 5)
+/**< Flag to enable independent enqueue. Must not be set if the device
+ * is not RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ capable. This feature
+ * allows an application to enqueue RTE_EVENT_OP_FORWARD or
+ * RTE_EVENT_OP_RELEASE in an order different than the order the
+ * events were dequeued from the event device, while maintaining
+ * RTE_SCHED_TYPE_ATOMIC or RTE_SCHED_TYPE_ORDERED semantics.
+ *
+ * Note that this flag only matters for Eventdevs supporting burst mode.
+ *
+ *  @see rte_event_port_setup()
+ */
 
 /** Event port configuration structure */
 struct rte_event_port_conf {
-- 
2.25.1



[PATCH v7 3/3] event/dsw: add capability for independent enqueue

2024-08-12 Thread Abdullah Sevincer
To use independent enqueue capability applications need to set flag
RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ during port setup only if the
capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ exists. Hence, this
commit adds the capability of independent enqueue to the DSW driver.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/rel_notes/release_24_11.rst | 4 
 drivers/event/dsw/dsw_evdev.c  | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 04f389876a..b8d1f36e54 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -35,6 +35,10 @@ New Features
   * Added support for independent enqueue feature. Updated Event Device and
 PMD feature list.
 
+* **Updated DSW Driver for independent enqueue feature**
+
+  * Added capability flag for DSW to advertise independent enqueue feature.
+
 
 Removed Items
 -
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 0dea1091e3..5c483d869c 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -230,7 +230,8 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
RTE_EVENT_DEV_CAP_NONSEQ_MODE|
RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
-   RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
+   RTE_EVENT_DEV_CAP_CARRY_FLOW_ID |
+   RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ
};
 }
 
-- 
2.25.1



Re: [PATCH v4 2/2] dts: mac filter test suite refactored for new dts

2024-08-12 Thread Dean Marx
>
> +"""Mac address filtering test suite.
> +
> +This test suite ensures proper and expected behavior of Allowlist
> filtering via mac
> +addresses on devices bound to the Poll Mode Driver. If a packet received
> on a device
> +contains a mac address not contained with its mac address pool, the
> packet should
> +be dropped. Alternatively, if a packet is received that contains a
> destination mac
> +within the devices address pool, the packet should be accepted and
> forwarded. This
> +behavior should remain consistent across all packets, namely those
> containing dot1q
> +tags or otherwise.
>


This should probably say "not contained within its mac address pool"
instead of "with", since that's how you're wording the rest of the
docstrings



> +Test cases within this suite utilize this method to create, send,
> and verify
> +packets based on criteria relating to the packet's destination
> mac address,
> +vlan tag, and whether or not the packet should be received or
> not. Packets
> +are verified using an inserted payload. Assuming the test case
> expects to
> +receive a specified packet, if the list of received packets
> contains this
> +payload within any of its packets, the test case passes.
> Alternatively, if
> +the designed packet should not be received, and the packet
> payload is not,
> +received, then the test case fails. Each call with this method
> sends exactly
> +one packet.
>


"and whether or not the packet should be received or not" is redundant

Reviewed-by: Dean Marx 


Re: [PATCH v1] dts: add package mode config and updated README

2024-08-12 Thread Nicholas Pratte
Reviewed-by: Nicholas Pratte 

On Mon, Aug 12, 2024 at 12:35 PM Dean Marx  wrote:
>
> In the current DTS setup description, the user installs poetry
> with the --no-root option. However, adding 'package-mode = false'
> to the pyproject.toml sets the same configuration, and running
> poetry install --no-root will become an error in a future
> poetry version.
>
> Signed-off-by: Dean Marx 
> ---
>  dts/README.md  | 4 ++--
>  dts/pyproject.toml | 1 +
>  2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/dts/README.md b/dts/README.md
> index ee3fa1c968..2b3a7f89c5 100644
> --- a/dts/README.md
> +++ b/dts/README.md
> @@ -37,7 +37,7 @@ to allow you to connect to hosts without specifying a 
> password.
>  ```shell
>  docker build --target dev -t dpdk-dts .
>  docker run -v $(pwd)/..:/dpdk -v /home/dtsuser/.ssh:/root/.ssh:ro -it 
> dpdk-dts bash
> -$ poetry install --no-root
> +$ poetry install
>  $ poetry shell
>  ```
>
> @@ -46,7 +46,7 @@ $ poetry shell
>  ```shell
>  docker build --target dev -t dpdk-dts .
>  docker run -v $(pwd)/..:/dpdk -it dpdk-dts bash
> -$ poetry install --no-root
> +$ poetry install
>  $ poetry shell
>  ```
>
> diff --git a/dts/pyproject.toml b/dts/pyproject.toml
> index 0b9b09805a..712af2f01d 100644
> --- a/dts/pyproject.toml
> +++ b/dts/pyproject.toml
> @@ -3,6 +3,7 @@
>  # Copyright(c) 2023 PANTHEON.tech s.r.o.
>
>  [tool.poetry]
> +package-mode = false
>  name = "dts"
>  version = "0.1.0"
>  description = "DPDK Test Suite."
> --
> 2.44.0
>


Re: [PATCH v4 1/2] dts: add methods for setting mac and multicast addresses

2024-08-12 Thread Dean Marx
Reviewed-by: Dean Marx 


Re: [PATCH v1 1/3] bbdev: new queue stat for available enqueue depth

2024-08-12 Thread Maxime Coquelin

Hi Nicolas,

On 8/12/24 19:27, Chautru, Nicolas wrote:

Hi Maxime,

The branch origin/next-baseband-for-main doesn’t have yet the updates from 
main, such as the ./doc/rel_notes for 24.11.
Can you refresh this? Or let me know how best to proceed.


I rebased the branches to latest main, you can proceed.

Thanks,
Maxime


Thanks,
Nic


-Original Message-
From: Maxime Coquelin 
Sent: Monday, August 12, 2024 2:29 AM
To: Chautru, Nicolas ; dev@dpdk.org
Cc: hemant.agra...@nxp.com; Marchand, David
; Vargas, Hernan

Subject: Re: [PATCH v1 1/3] bbdev: new queue stat for available enqueue
depth

Hi Nicolas,

On 4/4/24 23:04, Nicolas Chautru wrote:

Capturing additional queue stats counter for the depth of enqueue
batch still available on the given queue. This can help application to
monitor that depth at run time.

Signed-off-by: Nicolas Chautru 
---
   lib/bbdev/rte_bbdev.h | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h index
0cbfdd1c95..25514c58ac 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -283,6 +283,8 @@ struct rte_bbdev_stats {
 * bbdev operation
 */
uint64_t acc_offload_cycles;
+   /** Available number of enqueue batch on that queue. */
+   uint16_t enqueue_depth_avail;
   };

   /**


I think it needs to be documented in the ABI change section.

With that done, feel free to add:

Maxime Coquelin 

Thanks,
Maxime






[PATCH v8 0/3] Independent Enqueue Support

2024-08-12 Thread Abdullah Sevincer
v8: Address build issues.
v7: Address documentation reviews.
v6: Update patch with more documentation
v5: Address build issues
v4: Address comments
v3: Fix CI/build issues
v2: Fix CI/build issues
v1: Initial patchset

Abdullah Sevincer (3):
  event/dlb2: add support for independent enqueue
  eventdev: add support for independent enqueue
  event/dsw: add capability for independent enqueue

 doc/guides/eventdevs/dlb2.rst |  41 ++
 doc/guides/eventdevs/features/default.ini |   1 +
 doc/guides/eventdevs/features/dlb2.ini|   1 +
 doc/guides/rel_notes/release_24_11.rst|  34 +-
 drivers/event/dlb2/dlb2.c | 492 ++
 drivers/event/dlb2/dlb2_avx512.c  |  27 +-
 drivers/event/dlb2/dlb2_inline_fns.h  |   8 +
 drivers/event/dlb2/dlb2_priv.h|  25 +-
 drivers/event/dlb2/rte_pmd_dlb2.h |  10 +
 drivers/event/dsw/dsw_evdev.c |   3 +-
 lib/eventdev/rte_eventdev.h   |  37 ++
 11 files changed, 463 insertions(+), 216 deletions(-)

-- 
2.25.1



[PATCH v8 1/3] event/dlb2: add support for independent enqueue

2024-08-12 Thread Abdullah Sevincer
DLB devices need events to be enqueued in the same order they are
dequeued. Applications are not suppose to change event order between
dequeue and to enqueue. Since Eventdev standard does not add such
restrictions independent enqueue support is needed for DLB PMD so that
it restores dequeue order on enqueue if applications happen to change
it. It also adds missing releases in places where events are dropped
by the application and it expects implicit release to handle it.

By default the feature will be off on all DLB ports and they will
behave the same as older releases. To enable reordering feature,
applications need to add the flag RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ
to port configuration if only the device advertises the capability
RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/eventdevs/dlb2.rst  |  41 +++
 doc/guides/rel_notes/release_24_11.rst |  33 +-
 drivers/event/dlb2/dlb2.c  | 492 -
 drivers/event/dlb2/dlb2_avx512.c   |  27 +-
 drivers/event/dlb2/dlb2_inline_fns.h   |   8 +
 drivers/event/dlb2/dlb2_priv.h |  25 +-
 drivers/event/dlb2/rte_pmd_dlb2.h  |  10 +
 7 files changed, 417 insertions(+), 219 deletions(-)

diff --git a/doc/guides/eventdevs/dlb2.rst b/doc/guides/eventdevs/dlb2.rst
index 2532d92888..8b973cf81e 100644
--- a/doc/guides/eventdevs/dlb2.rst
+++ b/doc/guides/eventdevs/dlb2.rst
@@ -456,6 +456,47 @@ Example command to enable QE Weight feature:
 
--allow ea:00.0,enable_cq_weight=
 
+Independent Enqueue Capability
+~~
+
+DLB2 hardware device expects all forwarded events to be enqueued in the same
+order as they are dequeued. For dropped events, their releases should come at
+the same location as the original event was expected. Hardware has this
+restriction as it uses the order to retrieve information about the original
+event that was sent to the CPU.  This contains information like atomic flow
+ID to release the flow lock and ordered events sequence number to restore the
+original order.
+
+Some applications, like those based on the DPDK dispatcher library, want
+enqueue order independence. To support this, DLB2 PMD supports the
+``RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ`` capability.
+
+This capability applies to Eventdevs supporting burst mode. On ports where
+the application is going to change enqueue order,
+``RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ`` support should be enabled.
+
+Example code to inform PMD that the application plans to use independent 
enqueue
+order on a port:
+
+.. code-block:: c
+
+   if (capability & RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ)
+ port_config = port_config | RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ;
+
+This code example enables enqueue event reordering inside DLB2 PMD before the 
events
+are sent to the DLB2 hardware. If the application is not going to change the 
enqueue
+order, this flag should not be enabled to get better performance. DLB2 PMD 
saves
+ordering information inside the impl_opaque field of the event, and this field 
should
+be preserved for all FORWARD or RELEASE events. Following MACROs are provided 
to get
+and set this field inside the event in case the same event is not used for 
forwarding
+(e.g., a new RELEASE event is created when the original event is dropped 
instead of
+reusing the same event).
+
+.. code-block:: c
+
+   #define RTE_EVENT_GET_IMPL_OPAQUE(ev)  (ev->impl_opaque)
+   #define RTE_EVENT_SET_IMPL_OPAQUE(ev, val)  (ev->impl_opaque = val)
+
 Running Eventdev Applications with DLB Device
 -
 
diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..f0ec07c263 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -24,36 +24,11 @@ DPDK Release 24.11
 New Features
 
 
-.. This section should contain new features added in this release.
-   Sample format:
+* **Updated DLB2 Driver for independent enqueue feature**
 
-   * **Add a title in the past tense with a full stop.**
-
- Add a short 1-2 sentence description in the past tense.
- The description should be enough to allow someone scanning
- the release notes to understand the new feature.
-
- If the feature adds a lot of sub-features you can use a bullet list
- like this:
-
- * Added feature foo to do something.
- * Enhanced feature bar to do something else.
-
- Refer to the previous release notes for examples.
-
- Suggested order in release notes items:
- * Core libs (EAL, mempool, ring, mbuf, buses)
- * Device abstraction libs and PMDs (ordered alphabetically by vendor name)
-   - ethdev (lib, PMDs)
-   - cryptodev (lib, PMDs)
-   - eventdev (lib, PMDs)
-   - etc
- * Other libs
- * Apps, Examples, Tools (if significant)
-
- This section is a comment. Do not overwrite or remove it.
- Also, make

[PATCH v8 2/3] eventdev: add support for independent enqueue

2024-08-12 Thread Abdullah Sevincer
This commit adds support for independent enqueue feature
and updates Event Device and PMD feature list.

A new capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ is introduced
to support independent enqueue to support PMD to enqueue in any order
even the underlined hardware device needs enqueues in a strict dequeue
order.

To use this capability applications need to set flag
RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ during port setup only if the
capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ exists.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/eventdevs/features/default.ini |  1 +
 doc/guides/eventdevs/features/dlb2.ini|  1 +
 doc/guides/rel_notes/release_24_11.rst|  5 +++
 lib/eventdev/rte_eventdev.h   | 37 +++
 4 files changed, 44 insertions(+)

diff --git a/doc/guides/eventdevs/features/default.ini 
b/doc/guides/eventdevs/features/default.ini
index 1cc4303fe5..7c4ee99238 100644
--- a/doc/guides/eventdevs/features/default.ini
+++ b/doc/guides/eventdevs/features/default.ini
@@ -22,6 +22,7 @@ carry_flow_id  =
 maintenance_free   =
 runtime_queue_attr =
 profile_links  =
+independent_enq=
 
 ;
 ; Features of a default Ethernet Rx adapter.
diff --git a/doc/guides/eventdevs/features/dlb2.ini 
b/doc/guides/eventdevs/features/dlb2.ini
index 7b80286927..c7193b47c1 100644
--- a/doc/guides/eventdevs/features/dlb2.ini
+++ b/doc/guides/eventdevs/features/dlb2.ini
@@ -15,6 +15,7 @@ implicit_release_disable   = Y
 runtime_port_link  = Y
 multiple_queue_port= Y
 maintenance_free   = Y
+independent_enq= Y
 
 [Eth Rx adapter Features]
 
diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index f0ec07c263..04f389876a 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -30,6 +30,11 @@ New Features
   ``RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ`` to enable the feature if the 
capability
   ``RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ`` exists.
 
+* **Updated Event Device Library for independent enqueue feature**
+
+  * Added support for independent enqueue feature. Updated Event Device and
+PMD feature list.
+
 
 Removed Items
 -
diff --git a/lib/eventdev/rte_eventdev.h b/lib/eventdev/rte_eventdev.h
index 08e5f9320b..48e6eadda9 100644
--- a/lib/eventdev/rte_eventdev.h
+++ b/lib/eventdev/rte_eventdev.h
@@ -446,6 +446,31 @@ struct rte_event;
  * @see RTE_SCHED_TYPE_PARALLEL
  */
 
+#define RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ  (1ULL << 16)
+/**< Event device is capable of independent enqueue.
+ * A new capability, RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ, will indicate that 
Eventdev
+ * supports the enqueue in any order or specifically in a different order than 
the
+ * dequeue. Eventdev PMD can either transmit events in the changed order in 
which
+ * they are enqueued or restore the original order before sending them to the
+ * underlying hardware device. A flag is provided during the port 
configuration to
+ * inform Eventdev PMD that the application intends to use an independent 
enqueue
+ * order on a particular port. Note that this capability only matters for 
Eventdevs
+ * supporting burst mode.
+ *
+ * To Inform PMD that the application plans to use independent enqueue order 
on a port
+ * this code example can be used:
+ *
+ *  if (capability & RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ)
+ * port_config = port_config | RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ;
+ *
+ * When an implicit release is enabled on a port, Eventdev PMD will also handle
+ * the insertion of RELEASE events in place of dropped events. The independent 
enqueue
+ * feature only applies to FORWARD and RELEASE events. New events 
(op=RTE_EVENT_OP_NEW)
+ * will be transmitted in the order the application enqueues them and do not 
maintain
+ * any order relative to FORWARD/RELEASE events. FORWARD vs NEW relaxed 
ordering
+ * only applies to ports that have enabled independent enqueue feature.
+ */
+
 /* Event device priority levels */
 #define RTE_EVENT_DEV_PRIORITY_HIGHEST   0
 /**< Highest priority level for events and queues.
@@ -1072,6 +1097,18 @@ rte_event_queue_attr_set(uint8_t dev_id, uint8_t 
queue_id, uint32_t attr_id,
  *
  *  @see rte_event_port_setup()
  */
+ #define RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ   (1ULL << 5)
+/**< Flag to enable independent enqueue. Must not be set if the device
+ * is not RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ capable. This feature
+ * allows an application to enqueue RTE_EVENT_OP_FORWARD or
+ * RTE_EVENT_OP_RELEASE in an order different than the order the
+ * events were dequeued from the event device, while maintaining
+ * RTE_SCHED_TYPE_ATOMIC or RTE_SCHED_TYPE_ORDERED semantics.
+ *
+ * Note that this flag only matters for Eventdevs supporting burst mode.
+ *
+ *  @see rte_event_port_setup()
+ */
 
 /** Event port configuration structure */
 struct rte_event_port_conf {
-- 
2.25.1



[PATCH v8 3/3] event/dsw: add capability for independent enqueue

2024-08-12 Thread Abdullah Sevincer
To use independent enqueue capability applications need to set flag
RTE_EVENT_PORT_CFG_INDEPENDENT_ENQ during port setup only if the
capability RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ exists. Hence, this
commit adds the capability of independent enqueue to the DSW driver.

Signed-off-by: Abdullah Sevincer 
---
 doc/guides/rel_notes/release_24_11.rst | 4 
 drivers/event/dsw/dsw_evdev.c  | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 04f389876a..b8d1f36e54 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -35,6 +35,10 @@ New Features
   * Added support for independent enqueue feature. Updated Event Device and
 PMD feature list.
 
+* **Updated DSW Driver for independent enqueue feature**
+
+  * Added capability flag for DSW to advertise independent enqueue feature.
+
 
 Removed Items
 -
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
index 0dea1091e3..5c483d869c 100644
--- a/drivers/event/dsw/dsw_evdev.c
+++ b/drivers/event/dsw/dsw_evdev.c
@@ -230,7 +230,8 @@ dsw_info_get(struct rte_eventdev *dev __rte_unused,
RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE|
RTE_EVENT_DEV_CAP_NONSEQ_MODE|
RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT|
-   RTE_EVENT_DEV_CAP_CARRY_FLOW_ID
+   RTE_EVENT_DEV_CAP_CARRY_FLOW_ID |
+   RTE_EVENT_DEV_CAP_INDEPENDENT_ENQ
};
 }
 
-- 
2.25.1



Re: [PATCH v1 1/2] dts: add csum HW offload to testpmd shell

2024-08-12 Thread Nicholas Pratte
On Mon, Aug 12, 2024 at 9:39 AM Dean Marx  wrote:
>
> add csum_set_hw method to testpmd shell class. Port over
> set_verbose and port start/stop from queue start/stop suite.
>
> Signed-off-by: Dean Marx 
> ---
>  dts/framework/remote_session/testpmd_shell.py | 94 +++
>  1 file changed, 94 insertions(+)
>
> diff --git a/dts/framework/remote_session/testpmd_shell.py 
> b/dts/framework/remote_session/testpmd_shell.py
> index 43e9f56517..be8fbdc295 100644
> --- a/dts/framework/remote_session/testpmd_shell.py
> +++ b/dts/framework/remote_session/testpmd_shell.py
> @@ -806,6 +806,100 @@ def show_port_stats(self, port_id: int) -> 
> TestPmdPortStats:
>
>  return TestPmdPortStats.parse(output)
>
> +def port_stop(self, port: int, verify: bool = True):
> +"""Stop specified port.
> +
> +Args:
> +port: Specifies the port number to use, must be between 0-32.
> +verify: If :data:`True`, the output of the command is scanned
> +to ensure specified port is stopped. If not, it is considered
> +an error.
> +
> +Raises:
> +InteractiveCommandExecutionError: If `verify` is :data:`True` 
> and the port
> +is not stopped."""
> +port_output = self.send_command(f"port stop {port}")
> +if verify:
> +if "Done" not in port_output:
> +self._logger.debug(f"Failed to stop port {port}: 
> \n{port_output}")
> +raise InteractiveCommandExecutionError(f"Testpmd failed to 
> stop port {port}.")
> +
> +def port_start(self, port: int, verify: bool = True):
> +"""Start specified port.
> +
> +Args:
> +port: Specifies the port number to use, must be between 0-32.
> +verify: If :data:`True`, the output of the command is scanned
> +to ensure specified port is started. If not, it is considered
> +an error.
> +
> +Raises:
> +InteractiveCommandExecutionError: If `verify` is :data:`True` 
> and the port
> +is not started."""
> +port_output = self.send_command(f"port start {port}")
> +if verify:
> +if "Done" not in port_output:
> +self._logger.debug(f"Failed to start port {port}: 
> \n{port_output}")
> +raise InteractiveCommandExecutionError(f"Testpmd failed to 
> start port {port}.")
> +
> +def csum_set_hw(self, layer: str, port_id: int, verify: bool = True) -> 
> None:
> +"""Enables hardware checksum offloading on the specified layer.
> +
> +Args:
> +layer: The layer that checksum offloading should be enabled on.
> +options: tcp, ip, udp, sctp, outer-ip, outer-udp.
> +port_id: The port number to enable checksum offloading on, 
> should be within 0-32.
> +verify: If :data:`True` the output of the command will be 
> scanned in an attempt to
> +verify that checksum offloading was enabled on the port.

I ran into a similar situation with the VLAN offload set command which
requires a string input much like that of the 'layer' parameter here.
While this would technically work, it might be best to create some
kind of type class (or even Flags if at all possible) so that users
can select from a more rigorous list of options when writing test
suites.

For example, instead of passing a string into the method, you have the
user pass in something like csum_set_hw(LayerTypes.ip, port_id = 0)
where 'LayerTypes' is some kind of Flag or user-defined type class.
It's really just a preference, but I do think it makes for more
concise code.
> +
> +Raises:
> +InteractiveCommandExecutionError: If checksum offload is not 
> enabled successfully.
> +"""
> +csum_output = self.send_command(f"csum set {layer} hw {port_id}")
> +if (verify and ("Bad arguments" in csum_output or f"Please stop port 
> {port_id} first")):
> +self._logger.debug(f"Failed to set csum hw mode on port 
> {port_id}:\n{csum_output}")
> +raise InteractiveCommandExecutionError(
> +f"Failed to set csum hw mode on port {port_id}"
> +)
> +if verify and f"checksum offload is not supported by port {port_id}" 
> in csum_output:
> +self._logger.debug(f"Checksum {layer} offload is not supported 
> by port {port_id}:\n{csum_output}")
> +
> +success = False
> +if "outer-ip" in layer:
> +if "Outer-Ip checksum offload is hw" in csum_output:
> +success = True
> +if "outer-udp" in layer:
> +if "Outer-Udp checksum offload is hw" in csum_output:
> +success = True
> +else:
> +if f"{layer.upper} checksum offload is hw" in csum_output:
> +success = True
> +if not success and verify:
> +self._logger.debug(f"Failed to set csum hw m

RE: [PATCH] net/ena: revert redefining memcpy

2024-08-12 Thread Wathsala Wathawana Vithanage


> Subject: [PATCH] net/ena: revert redefining memcpy
> 
> Redefining memcpy as rte_memcpy has no performance gain on current
> compilers, and introduced bugs like this one where
> rte_memcpy() will be detected as referencing past the destination.
> 
> Bugzilla ID: 1510
> Fixes: 142778b3702a ("net/ena: switch memcpy to optimized version")

Similar observations in Arm when RTE_ARCH_ARM64_MEMCPY flag is on.

Acked-by: Wathsala Vithanage 

> 
> Signed-off-by: Stephen Hemminger 
> ---
>  drivers/net/ena/base/ena_plat_dpdk.h | 9 +
>  1 file changed, 1 insertion(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ena/base/ena_plat_dpdk.h
> b/drivers/net/ena/base/ena_plat_dpdk.h
> index 21b96113c7..291db6cabe 100644
> --- a/drivers/net/ena/base/ena_plat_dpdk.h
> +++ b/drivers/net/ena/base/ena_plat_dpdk.h
> @@ -26,7 +26,6 @@
>  #include 
> 
>  #include 
> -#include 
> 
>  typedef uint64_t u64;
>  typedef uint32_t u32;
> @@ -70,13 +69,7 @@ typedef uint64_t dma_addr_t;  #define
> ENA_UDELAY(x) rte_delay_us_block(x)
> 
>  #define ENA_TOUCH(x) ((void)(x))
> -/* Redefine memcpy with caution: rte_memcpy can be simply aliased to
> memcpy, so
> - * make the redefinition only if it's safe (and beneficial) to do so.
> - */
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64_MEMCPY) ||
> defined(RTE_ARCH_ARM_NEON_MEMCPY) -#undef memcpy -#define
> memcpy rte_memcpy -#endif
> +
>  #define wmb rte_wmb
>  #define rmb rte_rmb
>  #define mb rte_mb
> --
> 2.43.0



Re: [RFC v1 0/2] dts: initial checksum offload suite

2024-08-12 Thread Jeremy Spewock
Hey Dean,

Thanks for the patch, I left some thoughts on the point you raise
below and some of the larger things I noticed throughout the series.
The logic seems to all be right though and I can understand what you
were going for, so the broader ideas seem good to me.

On Mon, Aug 12, 2024 at 9:41 AM Dean Marx  wrote:
>
> Test suite for verifying checksum hardware offload through the
> PMD works as expected. This is done by checking the verbose output in
> testpmd while in csum forwarding mode, specifically the ol_flags
> section, to ensure they match the flags in the test plan. However,
> there are a few issues I noticed while writing the suite that made
> me hesitant to submit a patch:
>
> 1. SCTP hardware offload is not supported on any of the NICs I tested
> on. I've tried this on mlx5, i40e, and bnxt drivers and none of them
> support it. SCTP offload is used as part of almost every test case, so I
> removed SCTP packets from the suite entirely. I intend to keep it that
> way unless anyone is able to use the command "csum set sctp hw 0"
> without an "SCTP not supported" error.
> 2. There are two Tx checksum test cases, which involve checking the Tx
> flags section of verbose output to ensure they match the ones in the
> test plan. However, the Tx flags don't appear to change at all
> depending on what packet you send to testpmd, which leaves me with no
> way to verify correct behavior. I'm considering removing the Tx cases
> entirely, but they are a large chunk of the suite so if anyone disagrees
> I can look for more of a workaround.
>

As for the Tx issue, I have also noticed that these flags are
inconsistent. It looks like the Tx side of things have flags
representing if there is checksum data for the various layers, but
even then it doesn't show up in the verbose output when there is
verbose information. I think that if it isn't consistent then we can't
write a test suite that is generic enough to be run on the hardware
and it probably makes more sense to just leave it out of the suite.
I'm honestly not sure if it is a bug or just a misunderstanding with
what these flags should be doing since the Tx flags are much less
clear to me than the Rx. Regardless, it's not like it's something we
can search for with a capability or anything so we don't have a great
mechanism for getting this to work if it truly is just something where
different drivers do it differently.


> If anyone has any comments or advice about the issues above it is
> greatly appreciated.
>
> Dean Marx (2):
>   dts: add csum HW offload to testpmd shell
>   dts: checksum offload test suite
>
>  dts/framework/config/conf_yaml_schema.json|   3 +-
>  dts/framework/remote_session/testpmd_shell.py |  94 ++
>  dts/tests/TestSuite_checksum_offload.py   | 288 ++
>  3 files changed, 384 insertions(+), 1 deletion(-)
>  create mode 100644 dts/tests/TestSuite_checksum_offload.py
>
> --
> 2.44.0
>


Re: [RFC v1 1/2] dts: add csum HW offload to testpmd shell

2024-08-12 Thread Jeremy Spewock
On Mon, Aug 12, 2024 at 9:41 AM Dean Marx  wrote:
>
> add csum_set_hw method to testpmd shell class. Port over
> set_verbose and port start/stop from queue start/stop suite.
>
> Signed-off-by: Dean Marx 
> ---

> +def csum_set_hw(self, layer: str, port_id: int, verify: bool = True) -> 
> None:
> +"""Enables hardware checksum offloading on the specified layer.
> +
> +Args:
> +layer: The layer that checksum offloading should be enabled on.
> +options: tcp, ip, udp, sctp, outer-ip, outer-udp.

If there are specific options like this, it might be worth making this
parameter into a flag where users can specify one or more of the
options and testpmd can handle setting them all. That way users don't
have to even read the doc-string for the options, it will be apparent
in the class they have to use.

> +port_id: The port number to enable checksum offloading on, 
> should be within 0-32.
> +verify: If :data:`True` the output of the command will be 
> scanned in an attempt to
> +verify that checksum offloading was enabled on the port.
> +
> +Raises:
> +InteractiveCommandExecutionError: If checksum offload is not 
> enabled successfully.
> +"""
> +csum_output = self.send_command(f"csum set {layer} hw {port_id}")
> +if (verify and ("Bad arguments" in csum_output or f"Please stop port 
> {port_id} first")):
> +self._logger.debug(f"Failed to set csum hw mode on port 
> {port_id}:\n{csum_output}")
> +raise InteractiveCommandExecutionError(
> +f"Failed to set csum hw mode on port {port_id}"
> +)
> +if verify and f"checksum offload is not supported by port {port_id}" 
> in csum_output:
> +self._logger.debug(f"Checksum {layer} offload is not supported 
> by port {port_id}:\n{csum_output}")
> +
> +success = False
> +if "outer-ip" in layer:
> +if "Outer-Ip checksum offload is hw" in csum_output:
> +success = True
> +if "outer-udp" in layer:
> +if "Outer-Udp checksum offload is hw" in csum_output:
> +success = True
> +else:
> +if f"{layer.upper} checksum offload is hw" in csum_output:
> +success = True
> +if not success and verify:
> +self._logger.debug(f"Failed to set csum hw mode on port 
> {port_id}:\n{csum_output}")
> +

> --
> 2.44.0
>


Re: [RFC v1 2/2] dts: checksum offload test suite

2024-08-12 Thread Jeremy Spewock
On Mon, Aug 12, 2024 at 9:41 AM Dean Marx  wrote:
>
> test suite for verifying layer 3/4 checksum offload
> features on poll mode driver.
>
> Depends-on: patch-142762
> ("dts: add text parser for testpmd verbose output")
> Depends-on: patch-142691
> ("dts: add send_packets to test suites and rework packet addressing")
>
> Signed-off-by: Dean Marx 
> ---
>  dts/framework/config/conf_yaml_schema.json|   3 +-
>  dts/framework/remote_session/testpmd_shell.py |   2 +-
>  dts/tests/TestSuite_checksum_offload.py   | 288 ++
>  3 files changed, 291 insertions(+), 2 deletions(-)
>  create mode 100644 dts/tests/TestSuite_checksum_offload.py
>
> diff --git a/dts/framework/config/conf_yaml_schema.json 
> b/dts/framework/config/conf_yaml_schema.json
> index f02a310bb5..a83a6786df 100644
> --- a/dts/framework/config/conf_yaml_schema.json
> +++ b/dts/framework/config/conf_yaml_schema.json
> @@ -187,7 +187,8 @@
>"enum": [
>  "hello_world",
>  "os_udp",
> -"pmd_buffer_scatter"
> +"pmd_buffer_scatter",
> +"checksum_offload"
>]
>  },
>  "test_target": {
> diff --git a/dts/framework/remote_session/testpmd_shell.py 
> b/dts/framework/remote_session/testpmd_shell.py
> index be8fbdc295..01f378acc3 100644
> --- a/dts/framework/remote_session/testpmd_shell.py
> +++ b/dts/framework/remote_session/testpmd_shell.py
> @@ -856,7 +856,7 @@ def csum_set_hw(self, layer: str, port_id: int, verify: 
> bool = True) -> None:
>  InteractiveCommandExecutionError: If checksum offload is not 
> enabled successfully.
>  """
>  csum_output = self.send_command(f"csum set {layer} hw {port_id}")
> -if (verify and ("Bad arguments" in csum_output or f"Please stop port 
> {port_id} first")):
> +if (verify and ("Bad arguments" in csum_output or f"Please stop port 
> {port_id} first" in csum_output)):
>  self._logger.debug(f"Failed to set csum hw mode on port 
> {port_id}:\n{csum_output}")
>  raise InteractiveCommandExecutionError(
>  f"Failed to set csum hw mode on port {port_id}"

It looks like some testpmd changes snuck into the git history for your
test suite, these probably belong in the previous commit.


> +
> +def test_no_insert_checksums(self) -> None:
> +"""Enable checksum offload insertion and verify packet reception."""
> +packet_list = [
> +Ether() / IP() / UDP() / Raw("x"),
> +Ether() / IP() / TCP() / Raw("x"),

In cases like this where the payload is used so many times, it
probably makes more sense to pull it out into its own variable.

> +Ether() / IPv6(src="::1") / UDP() / Raw("x"),
> +Ether() / IPv6(src="::1") / TCP() / Raw("x"),
> +]
> +with TestPmdShell(node=self.sut_node, enable_rx_cksum=True) as 
> testpmd:
> +testpmd.set_forward_mode(SimpleForwardingModes.csum)
> +testpmd.set_verbose(level=1)
> +testpmd.start()
> +self.send_packet_and_verify(packet_list=packet_list, 
> load="x", should_receive=True)
> +for i in range(0, len(packet_list)):
> +self.send_packet_and_verify_checksum(
> +packet=packet_list[i], goodL4=True, goodIP=True, 
> testpmd=testpmd
> +)
> +

> 2.44.0
>


[PATCH] power: don't disable all cast qualifier warnings

2024-08-12 Thread Stephen Hemminger
Only in one place does the power library need un-constify a pointer
and this can be done by casting to uintptr_t first. Better, to have
the warning enabled across the rest of the code.

Signed-off-by: Stephen Hemminger 
---
 lib/power/meson.build  | 4 +---
 lib/power/rte_power_pmd_mgmt.c | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/power/meson.build b/lib/power/meson.build
index b8426589b2..2f0f3d26e9 100644
--- a/lib/power/meson.build
+++ b/lib/power/meson.build
@@ -30,7 +30,5 @@ headers = files(
 'rte_power_pmd_mgmt.h',
 'rte_power_uncore.h',
 )
-if cc.has_argument('-Wno-cast-qual')
-cflags += '-Wno-cast-qual'
-endif
+
 deps += ['timer', 'ethdev']
diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
index b1c18a5f56..6dec282054 100644
--- a/lib/power/rte_power_pmd_mgmt.c
+++ b/lib/power/rte_power_pmd_mgmt.c
@@ -664,7 +664,7 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
 * ports before calling any of these API's, so we can assume that the
 * callbacks can be freed. we're intentionally casting away const-ness.
 */
-   rte_free((void *)queue_cfg->cb);
+   rte_free((void *)(uintptr_t)queue_cfg->cb);
free(queue_cfg);
 
return 0;
-- 
2.43.0



[PATCH v2 0/3] bbdev: sdditional queue stats

2024-08-12 Thread Nicolas Chautru
v2: update to ABI doc suggested by Maxime. 

 These series include introducing a new paramter in the queue stat
which can be used to monitor the number of available enqueue
still possible. 
The acc PMD is then refactored to use a set of common function
to update several queue status parameters including the new one.
The application is also updated.
Thanks
Nic

Nicolas Chautru (3):
  bbdev: new queue stat for available enqueue depth
  baseband/acc: refactor queue status update
  test/bbdev: update for queue stats

 app/test-bbdev/test_bbdev_perf.c   |  1 +
 doc/guides/rel_notes/release_24_11.rst |  3 ++
 drivers/baseband/acc/acc_common.h  | 18 
 drivers/baseband/acc/rte_acc100_pmd.c  | 45 ++-
 drivers/baseband/acc/rte_vrb_pmd.c | 61 --
 lib/bbdev/rte_bbdev.h  |  2 +
 6 files changed, 56 insertions(+), 74 deletions(-)

-- 
2.34.1



[PATCH v2 1/3] bbdev: new queue stat for available enqueue depth

2024-08-12 Thread Nicolas Chautru
Capturing additional queue stats counter for the
depth of enqueue batch still available on the given
queue. This can help application to monitor that depth
at run time.

Signed-off-by: Nicolas Chautru 
---
 doc/guides/rel_notes/release_24_11.rst | 3 +++
 lib/bbdev/rte_bbdev.h  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/doc/guides/rel_notes/release_24_11.rst 
b/doc/guides/rel_notes/release_24_11.rst
index 0ff70d9057..a45b9b2dc6 100644
--- a/doc/guides/rel_notes/release_24_11.rst
+++ b/doc/guides/rel_notes/release_24_11.rst
@@ -88,6 +88,9 @@ API Changes
 ABI Changes
 ---
 
+  * bbdev: Structure ``rte_bbdev_stats`` was updated to add new parameter
+to optionally report number of enqueue batch available 
``enqueue_depth_avail``.
+
 .. This section should contain ABI changes. Sample format:
 
* sample: Add a short 1-2 sentence description of the ABI change
diff --git a/lib/bbdev/rte_bbdev.h b/lib/bbdev/rte_bbdev.h
index 0cbfdd1c95..25514c58ac 100644
--- a/lib/bbdev/rte_bbdev.h
+++ b/lib/bbdev/rte_bbdev.h
@@ -283,6 +283,8 @@ struct rte_bbdev_stats {
 * bbdev operation
 */
uint64_t acc_offload_cycles;
+   /** Available number of enqueue batch on that queue. */
+   uint16_t enqueue_depth_avail;
 };
 
 /**
-- 
2.34.1



[PATCH v2 3/3] test/bbdev: update for queue stats

2024-08-12 Thread Nicolas Chautru
Update to include in test application the queue stats for the
enqueue_depth_avail counter.

Signed-off-by: Nicolas Chautru 
---
 app/test-bbdev/test_bbdev_perf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index 3a94f15a30..259192c670 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -5816,6 +5816,7 @@ get_bbdev_queue_stats(uint16_t dev_id, uint16_t queue_id,
stats->enqueue_warn_count = q_stats->enqueue_warn_count;
stats->dequeue_warn_count = q_stats->dequeue_warn_count;
stats->acc_offload_cycles = q_stats->acc_offload_cycles;
+   stats->enqueue_depth_avail = q_stats->enqueue_depth_avail;
 
return 0;
 }
-- 
2.34.1



[PATCH v2 2/3] baseband/acc: refactor queue status update

2024-08-12 Thread Nicolas Chautru
Introducing common function for queue stats update
within the acc PMDs.

Signed-off-by: Nicolas Chautru 
---
 drivers/baseband/acc/acc_common.h | 18 
 drivers/baseband/acc/rte_acc100_pmd.c | 45 ++--
 drivers/baseband/acc/rte_vrb_pmd.c| 61 +--
 3 files changed, 50 insertions(+), 74 deletions(-)

diff --git a/drivers/baseband/acc/acc_common.h 
b/drivers/baseband/acc/acc_common.h
index e249f37e38..06a88360de 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -1555,6 +1555,24 @@ acc_aq_avail(struct rte_bbdev_queue_data *q_data, 
uint16_t num_ops)
return aq_avail;
 }
 
+/* Update queue stats during enqueue. */
+static inline void
+acc_update_qstat_enqueue(struct rte_bbdev_queue_data *q_data,
+   uint16_t enq_count, uint16_t enq_err_count)
+{
+   q_data->queue_stats.enqueued_count += enq_count;
+   q_data->queue_stats.enqueue_err_count += enq_err_count;
+   q_data->queue_stats.enqueue_depth_avail = acc_aq_avail(q_data, 0);
+}
+
+/* Update queue stats during dequeue. */
+static inline void
+acc_update_qstat_dequeue(struct rte_bbdev_queue_data *q_data, uint16_t 
deq_count)
+{
+   q_data->queue_stats.dequeued_count += deq_count;
+   q_data->queue_stats.enqueue_depth_avail = acc_aq_avail(q_data, 0);
+}
+
 /* Calculates number of CBs in processed encoder TB based on 'r' and input
  * length.
  */
diff --git a/drivers/baseband/acc/rte_acc100_pmd.c 
b/drivers/baseband/acc/rte_acc100_pmd.c
index ab69350080..d2a0a36e12 100644
--- a/drivers/baseband/acc/rte_acc100_pmd.c
+++ b/drivers/baseband/acc/rte_acc100_pmd.c
@@ -892,6 +892,7 @@ acc100_queue_stop(struct rte_bbdev *dev, uint16_t queue_id)
dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0;
dev->data->queues[queue_id].queue_stats.enqueue_warn_count = 0;
dev->data->queues[queue_id].queue_stats.dequeue_warn_count = 0;
+   dev->data->queues[queue_id].queue_stats.enqueue_depth_avail = 0;
 
return 0;
 }
@@ -3196,9 +3197,7 @@ acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
 
acc_dma_enqueue(q, i, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
return i;
 }
 
@@ -3245,9 +3244,7 @@ acc100_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data 
*q_data,
 
acc_dma_enqueue(q, desc_idx, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
 
return i;
 }
@@ -3284,9 +3281,7 @@ acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
 
acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
 
return i;
 }
@@ -3322,9 +3317,7 @@ acc100_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data 
*q_data,
 
acc_dma_enqueue(q, enqueued_descs, &q_data->queue_stats);
 
-   /* Update stats. */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
 
return i;
 }
@@ -3388,9 +3381,7 @@ acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
 
acc_dma_enqueue(q, i, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
 
return i;
 }
@@ -3426,9 +3417,7 @@ acc100_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data 
*q_data,
 
acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
return i;
 }
 
@@ -3468,9 +3457,7 @@ acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data 
*q_data,
 
acc_dma_enqueue(q, i, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
return i;
 }
 
@@ -3505,9 +3492,7 @@ acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
 
acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
 
-   /* Update stats */
-   q_data->queue_stats.enqueued_count += i;
-   q_data->queue_stats.enqueue_err_count += num - i;
+   acc_update_qstat_enqueue(q_data, i, num - i);
 
return i;
 }
@@ -3897,8 +3882,7 @@ acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
  

RE: [PATCH] power: don't disable all cast qualifier warnings

2024-08-12 Thread Morten Brørup
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> 
> Only in one place does the power library need un-constify a pointer
> and this can be done by casting to uintptr_t first. Better, to have
> the warning enabled across the rest of the code.
> 
> Signed-off-by: Stephen Hemminger 
> ---

Agree; disabling warnings should be avoided.

Acked-by: Morten Brørup