Re: [dpdk-dev] [PATCH 19.02 2/2] mem: use memfd for no-huge mode
On 28-Nov-18 4:57 AM, Tiwei Bie wrote: On Tue, Nov 13, 2018 at 05:54:48PM +, Anatoly Burakov wrote: When running in no-huge mode, we anonymously allocate our memory. While this works for regular NICs and vdev's, it's not suitable for memory sharing scenarios such as virtio with vhost_user backend. To fix this, allocate no-huge memory using memfd, and register it with memalloc just like any other memseg fd. This will enable using rte_memseg_get_fd() API with --no-huge EAL flag. Signed-off-by: Anatoly Burakov --- lib/librte_eal/linuxapp/eal/eal_memory.c | 46 ++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 48b23ce19..8feac2c56 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -25,6 +25,7 @@ #include #include #include +#include #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES #include #include @@ -1345,12 +1346,15 @@ eal_legacy_hugepage_init(void) /* hugetlbfs can be disabled */ if (internal_config.no_hugetlbfs) { struct rte_memseg_list *msl; + int n_segs, cur_seg, fd, memfd, flags; uint64_t page_sz; - int n_segs, cur_seg; /* nohuge mode is legacy mode */ internal_config.legacy_mem = 1; + /* nohuge mode is single-file segments mode */ + internal_config.single_file_segments = 1; + /* create a memseg list */ msl = &mcfg->memsegs[0]; @@ -1363,8 +1367,36 @@ eal_legacy_hugepage_init(void) return -1; } + /* set up parameters for anonymous mmap */ + fd = -1; + flags = MAP_PRIVATE | MAP_ANONYMOUS; + + /* create a memfd and store it in the segment fd table */ + memfd = memfd_create("nohuge", 0); + if (memfd < 0) { + RTE_LOG(ERR, EAL, "Cannot create memfd: %s\n", + strerror(errno)); + RTE_LOG(ERR, EAL, "Falling back to anonymous map\n"); + } else { + /* we got an fd - now resize it */ + if (ftruncate(memfd, internal_config.memory) < 0) { + RTE_LOG(ERR, EAL, "Cannot resize memfd: %s\n", + strerror(errno)); + RTE_LOG(ERR, EAL, "Falling back to anonymous map\n"); + close(memfd); + } else { + /* creating memfd-backed file was successful. +* we want changes to memfd to be visible to +* other processes (such as vhost backend), so +* map it as shared memory. +*/ + RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n"); + fd = memfd; + flags = MAP_SHARED; + } + } addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + flags, fd, 0); if (addr == MAP_FAILED) { RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, strerror(errno)); @@ -1375,6 +1407,16 @@ eal_legacy_hugepage_init(void) msl->socket_id = 0; msl->len = internal_config.memory; + /* we're in single-file segments mode, so only the segment list +* fd needs to be set up. +*/ + if (fd != -1) { + if (eal_memalloc_set_seg_list_fd(0, fd) < 0) { + RTE_LOG(ERR, EAL, "Cannot set up segment list fd\n"); + /* not a serious error, proceed */ + } + } Hi Anatoly, Thanks for the work! It seems the support for getting fd offset is missing in no-huge mode. I got below error in virtio-user while trying this series with --no-huge: update_memory_region(): Failed to get offset, ms=0x10002e000 rte_errno=19 That's weird, it should have been working. I'll look into it, thanks! Thanks + /* populate memsegs. each memseg is one page long */ for (cur_seg = 0; cur_seg < n_segs; cur_seg++) { arr = &msl->memseg_arr; -- 2.17.1 -- Thanks, Anatoly
[dpdk-dev] [PATCH v1] net/i40e: fix RX/TX setup when restart port
Before this patch, there are two functions that will clear RX/TX queues number: rte_eth_dev_close() and i40e_dev_free_queues(). But if also i40e_dev_free_queues() clear it, RX/TX queues will not set up correctly when restart port. Fixes: 6b4537128394 ("i40e: free queue memory when closing") Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_rxtx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index e1152ff0e..cc953ad58 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -2753,7 +2753,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_rx_queue_release(dev->data->rx_queues[i]); dev->data->rx_queues[i] = NULL; } - dev->data->nb_rx_queues = 0; for (i = 0; i < dev->data->nb_tx_queues; i++) { if (!dev->data->tx_queues[i]) @@ -2761,7 +2760,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_tx_queue_release(dev->data->tx_queues[i]); dev->data->tx_queues[i] = NULL; } - dev->data->nb_tx_queues = 0; } #define I40E_FDIR_NUM_TX_DESC I40E_MIN_RING_DESC -- 2.17.1
[dpdk-dev] [PATCH v1] net/i40e: fix VF port reset
Port reset will call i40evf_uninit_vf() to release resource. It wants to call i40evf_dev_close() to do some clean work. Before this patch, port reset will never call i40evf_dev_close() to shutdown adminq. So the i40evf_dev_init() will failed. Fixs: 18599342024("net/i40e: fix VF add/del MAC") Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_ethdev_vf.c | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c index ae55b9b18..cecedc91f 100644 --- a/drivers/net/i40e/i40e_ethdev_vf.c +++ b/drivers/net/i40e/i40e_ethdev_vf.c @@ -1924,15 +1924,12 @@ static int i40evf_dev_start(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); - hw->adapter_stopped = 0; - vf->max_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; vf->num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); @@ -2004,7 +2001,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); PMD_INIT_FUNC_TRACE(); @@ -2012,8 +2008,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) if (dev->data->dev_conf.intr_conf.rxq != 0) rte_intr_disable(intr_handle); - if (hw->adapter_stopped == 1) - return; i40evf_stop_queues(dev); i40evf_disable_queues_intr(dev); i40e_dev_clear_queues(dev); @@ -2029,7 +2023,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) /* remove all multicast addresses */ i40evf_add_del_mc_addr_list(dev, vf->mc_addrs, vf->mc_addrs_num, FALSE); - hw->adapter_stopped = 1; } @@ -2245,21 +2238,32 @@ static void i40evf_dev_close(struct rte_eth_dev *dev) { struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + + if (dev->data->dev_started != 0) { + dev->data->dev_started = 0; + i40evf_dev_stop(dev); + } - i40evf_dev_stop(dev); i40e_dev_free_queues(dev); /* * disable promiscuous mode before reset vf * it is a workaround solution when work with kernel driver * and it is not the normal way +* +* If VF is reset, AQTX: head overrun at 0xdeadbeef > 32 */ - i40evf_dev_promiscuous_disable(dev); - i40evf_dev_allmulticast_disable(dev); + if (!vf->vf_reset) { + i40evf_dev_promiscuous_disable(dev); + i40evf_dev_allmulticast_disable(dev); + i40evf_reset_vf(hw); + } - i40evf_reset_vf(hw); + rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); i40e_shutdown_adminq(hw); i40evf_disable_irq0(hw); - rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); + + hw->adapter_stopped = 1; } /* -- 2.17.1
[dpdk-dev] [PATCH v1] net/i40e: fix RX/TX setup when restart port
Before this patch, there are two functions that will clear RX/TX queues number: rte_eth_dev_close() and i40e_dev_free_queues(). But if also i40e_dev_free_queues() clear it, RX/TX queues will not set up correctly when restart port. Fixes: 6b4537128394 ("i40e: free queue memory when closing") Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_rxtx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index e1152ff0e..cc953ad58 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -2753,7 +2753,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_rx_queue_release(dev->data->rx_queues[i]); dev->data->rx_queues[i] = NULL; } - dev->data->nb_rx_queues = 0; for (i = 0; i < dev->data->nb_tx_queues; i++) { if (!dev->data->tx_queues[i]) @@ -2761,7 +2760,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_tx_queue_release(dev->data->tx_queues[i]); dev->data->tx_queues[i] = NULL; } - dev->data->nb_tx_queues = 0; } #define I40E_FDIR_NUM_TX_DESC I40E_MIN_RING_DESC -- 2.17.1
[dpdk-dev] [PATCH v1] net/i40e: fix VF port reset
Port reset will call i40evf_uninit_vf() to release resource. It wants to call i40evf_dev_close() to do some clean work. Before this patch, port reset will never call i40evf_dev_close() to shutdown adminq. So the i40evf_dev_init() will failed. Fixs: 18599342024("net/i40e: fix VF add/del MAC") Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_ethdev_vf.c | 28 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c index ae55b9b18..cecedc91f 100644 --- a/drivers/net/i40e/i40e_ethdev_vf.c +++ b/drivers/net/i40e/i40e_ethdev_vf.c @@ -1924,15 +1924,12 @@ static int i40evf_dev_start(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); - hw->adapter_stopped = 0; - vf->max_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len; vf->num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); @@ -2004,7 +2001,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) { struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); PMD_INIT_FUNC_TRACE(); @@ -2012,8 +2008,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) if (dev->data->dev_conf.intr_conf.rxq != 0) rte_intr_disable(intr_handle); - if (hw->adapter_stopped == 1) - return; i40evf_stop_queues(dev); i40evf_disable_queues_intr(dev); i40e_dev_clear_queues(dev); @@ -2029,7 +2023,6 @@ i40evf_dev_stop(struct rte_eth_dev *dev) /* remove all multicast addresses */ i40evf_add_del_mc_addr_list(dev, vf->mc_addrs, vf->mc_addrs_num, FALSE); - hw->adapter_stopped = 1; } @@ -2245,21 +2238,32 @@ static void i40evf_dev_close(struct rte_eth_dev *dev) { struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + + if (dev->data->dev_started != 0) { + dev->data->dev_started = 0; + i40evf_dev_stop(dev); + } - i40evf_dev_stop(dev); i40e_dev_free_queues(dev); /* * disable promiscuous mode before reset vf * it is a workaround solution when work with kernel driver * and it is not the normal way +* +* If VF is reset, AQTX: head overrun at 0xdeadbeef > 32 */ - i40evf_dev_promiscuous_disable(dev); - i40evf_dev_allmulticast_disable(dev); + if (!vf->vf_reset) { + i40evf_dev_promiscuous_disable(dev); + i40evf_dev_allmulticast_disable(dev); + i40evf_reset_vf(hw); + } - i40evf_reset_vf(hw); + rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); i40e_shutdown_adminq(hw); i40evf_disable_irq0(hw); - rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); + + hw->adapter_stopped = 1; } /* -- 2.17.1
[dpdk-dev] [PATCH v1] net/i40e: fix PF port close
Before this patch, the function i40e_dev_close wants to call i40e_dev_stop() to release res, but it will never call it. Fixes: 4861cde4611601ccc9 ("i40e: new poll mode driver") Signed-off-by: Zhirun Yan --- drivers/net/i40e/i40e_ethdev.c | 15 ++- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 7030eb1fa..bc5e45095 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -2391,15 +2391,11 @@ static void i40e_dev_stop(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *main_vsi = pf->main_vsi; struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int i; - if (hw->adapter_stopped == 1) - return; - if (dev->data->dev_conf.intr_conf.rxq == 0) { rte_eal_alarm_cancel(i40e_dev_alarm_handler, dev); rte_intr_enable(intr_handle); @@ -2442,8 +2438,6 @@ i40e_dev_stop(struct rte_eth_dev *dev) /* reset hierarchy commit */ pf->tm_conf.committed = false; - - hw->adapter_stopped = 1; } static void @@ -2460,7 +2454,10 @@ i40e_dev_close(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); - i40e_dev_stop(dev); + if (dev->data->dev_started != 0) { + dev->data->dev_started = 0; + i40e_dev_stop(dev); + } /* Remove all mirror rules */ while ((p_mirror = TAILQ_FIRST(&pf->mirror_list))) { @@ -2523,6 +2520,8 @@ i40e_dev_close(struct rte_eth_dev *dev) I40E_WRITE_REG(hw, I40E_PFGEN_CTRL, (reg | I40E_PFGEN_CTRL_PFSWR_MASK)); I40E_WRITE_FLUSH(hw); + + hw->adapter_stopped = 1; } /* @@ -2539,8 +2538,6 @@ i40e_dev_reset(struct rte_eth_dev *dev) * To avoid unexpected behavior in VF, currently reset of PF with * SR-IOV activation is not supported. It might be supported later. */ - if (dev->data->sriov.active) - return -ENOTSUP; ret = eth_i40e_dev_uninit(dev); if (ret) -- 2.17.1
[dpdk-dev] [PATCH] net/mlx5: fix validation of Rx queue number
Function mlx5_ctrl_flow_vlan() is used to set the rss rule in MLX5 PMD, using priv->reta_idx_n as number of Rx queues. This number is passed to mlx5_flow_validate_action_rss(), which attempts to access the Rx queues at priv->rxqs. In case priv->rxqs_n is 0, priv->rxqs is empty, and mlx5_flow_validate_action_rss() will crash with segmentation fault. priv->reta_idx_n can never be 0, even if priv->rxqs_n is set to 0. But when priv->rxqs_n is set to 0, setting the rss rule is invalid. This patch updates mlx5_ctrl_flow_vlan(), if priv->rxqs_n is 0 the function will fail with EINVAL errno. Fixes: 8086cf08b2f0 ("net/mlx5: handle RSS hash configuration in RSS flow") Cc: nelio.laranje...@6wind.com Signed-off-by: Dekel Peled --- drivers/net/mlx5/mlx5_flow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 97dc3e1..ee129b9 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -2314,7 +2314,7 @@ struct rte_flow * struct rte_flow_error error; unsigned int i; - if (!priv->reta_idx_n) { + if (!priv->reta_idx_n || !priv->rxqs_n) { rte_errno = EINVAL; return -rte_errno; } -- 1.8.3.1
[dpdk-dev] [PATCH v2 0/2] Support request more queues
DPDK VF send VIRTCHNL_OP_REQUEST_QUEUES to kernel PF or DPDK VF for requesting more queues, then PF will allocate more queues. Zhirun Yan (2): net/i40e: support VF request more queues net/i40e: support PF respond VF request more queues drivers/net/i40e/i40e_ethdev_vf.c | 59 +-- drivers/net/i40e/i40e_pf.c| 67 +++ 2 files changed, 122 insertions(+), 4 deletions(-) -- 2.17.1
[dpdk-dev] [PATCH v2 2/2] net/i40e: support PF respond VF request more queues
This patch respond the VIRTCHNL_OP_REQUEST_QUEUES msg from VF, and process to allocated more queues for the requested VF. If successful, PF will notify VF to reset. If unsuccessful, PF will send message to inform VF. Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_pf.c | 67 ++ 1 file changed, 67 insertions(+) diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c index dd3962d38..789a98dc7 100644 --- a/drivers/net/i40e/i40e_pf.c +++ b/drivers/net/i40e/i40e_pf.c @@ -1218,6 +1218,68 @@ i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf) I40E_SUCCESS, (uint8_t *)&event, sizeof(event)); } +/** + * i40e_vc_notify_vf_reset + * @vf: pointer to the VF structure + * + * indicate a pending reset to the given VF + **/ +static void +i40e_vc_notify_vf_reset(struct i40e_pf_vf *vf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); + struct virtchnl_pf_event pfe; + int abs_vf_id; + uint16_t vf_id = vf->vf_idx; + + abs_vf_id = vf_id + hw->func_caps.vf_base_id; + pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING; + pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM; + i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT, + 0, (u8 *)&pfe, + sizeof(struct virtchnl_pf_event), NULL); +} + +static int +i40e_pf_host_process_cmd_request_queues(struct i40e_pf_vf *vf, + uint8_t *msg) +{ + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + struct i40e_pf *pf; + uint32_t req_pairs = vfres->num_queue_pairs; + uint32_t cur_pairs = vf->vsi->nb_used_qps; + + pf = vf->pf; + + if (req_pairs <= 0) { + PMD_DRV_LOG(ERR, "VF %d tried to request %d queues. Ignoring.\n", + vf->vf_idx, + I40E_MAX_QP_NUM_PER_VF); + } else if (req_pairs > I40E_MAX_QP_NUM_PER_VF) { + PMD_DRV_LOG(ERR, "VF %d tried to request more than %d queues.\n", + vf->vf_idx, + I40E_MAX_QP_NUM_PER_VF); + vfres->num_queue_pairs = I40E_MAX_QP_NUM_PER_VF; + } else if (req_pairs > cur_pairs + pf->qp_pool.num_free) { + PMD_DRV_LOG(ERR, "VF %d requested %d more queues, but noly %d left\n", + vf->vf_idx, + req_pairs - cur_pairs, + pf->qp_pool.num_free); + vfres->num_queue_pairs = pf->qp_pool.num_free + cur_pairs; + } else { + i40e_vc_notify_vf_reset(vf); + vf->vsi->nb_qps = req_pairs; + pf->vf_nb_qps = req_pairs; + i40e_pf_host_process_cmd_reset_vf(vf); + + return 0; + } + + return i40e_pf_host_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0, + (u8 *)vfres, sizeof(*vfres)); +} + void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint16_t abs_vf_id, uint32_t opcode, @@ -1351,6 +1413,11 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, PMD_DRV_LOG(INFO, "OP_CONFIG_RSS_KEY received"); i40e_pf_host_process_cmd_set_rss_key(vf, msg, msglen, b_op); break; + case VIRTCHNL_OP_REQUEST_QUEUES: + PMD_DRV_LOG(INFO, "OP_REQUEST_QUEUES received"); + i40e_pf_host_process_cmd_request_queues(vf, msg); + break; + /* Don't add command supported below, which will * return an error code. */ -- 2.17.1
[dpdk-dev] [PATCH v2 1/2] net/i40e: support VF request more queues
Before this patch, VF gets a default number of queues from the PF. This patch enables VF to request a different number. When VF configures more queues, it will send VIRTCHNL_OP_REQUEST_QUEUES to PF to request more queues, if success, PF will reset the VF. User can run "port stop all", "port config port_id rxq/txq queue_num" and "port start all" to reconfigure queue number. Signed-off-by: Zhirun Yan Signed-off-by: Haiyue Wang --- drivers/net/i40e/i40e_ethdev_vf.c | 59 --- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c index cecedc91f..551448bdb 100644 --- a/drivers/net/i40e/i40e_ethdev_vf.c +++ b/drivers/net/i40e/i40e_ethdev_vf.c @@ -343,6 +343,7 @@ i40evf_execute_vf_cmd(struct rte_eth_dev *dev, struct vf_cmd_info *args) err = 0; break; case VIRTCHNL_OP_VERSION: + case VIRTCHNL_OP_REQUEST_QUEUES: case VIRTCHNL_OP_GET_VF_RESOURCES: /* for init adminq commands, need to poll the response */ err = -1; @@ -350,8 +351,15 @@ i40evf_execute_vf_cmd(struct rte_eth_dev *dev, struct vf_cmd_info *args) ret = i40evf_read_pfmsg(dev, &info); vf->cmd_retval = info.result; if (ret == I40EVF_MSG_CMD) { - err = 0; + if (info.ops != VIRTCHNL_OP_REQUEST_QUEUES) + err = 0; break; + } else if (ret == I40EVF_MSG_SYS) { + if (args->ops == VIRTCHNL_OP_REQUEST_QUEUES && + vf->vf_reset) { + err = 0; + break; + } } else if (ret == I40EVF_MSG_ERR) break; rte_delay_ms(ASQ_DELAY_MS); @@ -1012,6 +1020,28 @@ i40evf_add_vlan(struct rte_eth_dev *dev, uint16_t vlanid) return err; } +static int +i40evf_request_queues(struct rte_eth_dev *dev, uint16_t num) +{ + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + struct virtchnl_vf_res_request vfres; + struct vf_cmd_info args; + int err; + + vfres.num_queue_pairs = num; + + args.ops = VIRTCHNL_OP_REQUEST_QUEUES; + args.in_args = (u8 *)&vfres; + args.in_args_size = sizeof(vfres); + args.out_buffer = vf->aq_resp; + args.out_size = I40E_AQ_BUF_SZ; + err = i40evf_execute_vf_cmd(dev, &args); + if (err) + PMD_DRV_LOG(ERR, "fail to execute command OP_REQUEST_QUEUES"); + + return err; +} + static int i40evf_del_vlan(struct rte_eth_dev *dev, uint16_t vlanid) { @@ -1515,8 +1545,12 @@ RTE_PMD_REGISTER_KMOD_DEP(net_i40e_vf, "* igb_uio | vfio-pci"); static int i40evf_dev_configure(struct rte_eth_dev *dev) { + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_adapter *ad = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + uint16_t num_queue_pairs = RTE_MAX(dev->data->nb_rx_queues, + dev->data->nb_tx_queues); + int ret = 0; /* Initialize to TRUE. If any of Rx queues doesn't meet the bulk * allocation or vector Rx preconditions we will reset it. @@ -1526,7 +1560,24 @@ i40evf_dev_configure(struct rte_eth_dev *dev) ad->tx_simple_allowed = true; ad->tx_vec_allowed = true; - return i40evf_init_vlan(dev); + if (num_queue_pairs != vf->vsi_res->num_queue_pairs) { + PMD_DRV_LOG(INFO, "change queue pairs from %u to %u", + vf->vsi_res->num_queue_pairs, num_queue_pairs); + ret = i40evf_request_queues(dev, num_queue_pairs); + if (ret != 0) + return ret; + + ret = i40evf_dev_reset(dev); + vf->vf_reset = false; + vf->pend_msg &= ~PFMSG_RESET_IMPENDING; + + if (ret != 0) + return ret; + } + + i40evf_init_vlan(dev); + + return ret; } static int @@ -2137,8 +2188,8 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs; - dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs; + dev_info->max_rx_queues = I40E_MAX_QP_NUM_PER_VF; + dev_info->max_tx_queues = I40E_MAX_QP_NUM_PER_VF; dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN; dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX; dev_info->hash_key_size = (I40E_VFQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t); -- 2.17.1
Re: [dpdk-dev] [PATCH 3/3] lib/librte_meter: update abi to include new rfc4115 function
On 28 Nov 2018, at 9:38, David Marchand wrote: Hello Eelco, On Tue, Nov 27, 2018 at 4:22 PM Eelco Chaudron wrote: Update the ABI to include the new RFC4115 meter functions --- lib/librte_meter/Makefile |2 +- lib/librte_meter/meson.build |2 +- lib/librte_meter/rte_meter_version.map |9 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile index 2dc071e8e..79ad79744 100644 --- a/lib/librte_meter/Makefile +++ b/lib/librte_meter/Makefile @@ -16,7 +16,7 @@ LDLIBS += -lrte_eal EXPORT_MAP := rte_meter_version.map -LIBABIVER := 2 +LIBABIVER := 3 # # all source are stored in SRCS-y As far as I understood the policy around the EXPERIMENTAL section, you don't need to bump the library version. Thought I would add the new function as none experimental, i.e. next version, but checkpatch did not allow me to do this. Tried to find info on what the right process was, as these functions are just another meter implementation using similar existing APIs. If anyone has any background on this please point me to it. I changed the library version as an existing data structure changed (which in theory should not change the location of the data), but the ABI check popped warnings so I decided to increase the version. + you should squash this into the first patch. I’ll do this on the next version. Thanks, Eelco
Re: [dpdk-dev] [v1] net/i40e: fix RX/TX setup when restart port
On 28.11.2018 19:51, Zhirun Yan wrote: > Before this patch, there are two functions that will clear RX/TX queues > number: rte_eth_dev_close() and i40e_dev_free_queues(). But if also > i40e_dev_free_queues() clear it, RX/TX queues will not set up correctly > when restart port. According to DPDK API device could not be restarted after rte_eth_dev_close: "Close a stopped Ethernet device. The device cannot be restarted!" http://doc.dpdk.org/api/rte__ethdev_8h.html#a93eeb672a2f9cd18e338aad10c77687c You should not close the device if you're willing to use it later. If you really want to close it, you'll need to detach it and attach back when needed. > > Fixes: 6b4537128394 ("i40e: free queue memory when closing") > > Signed-off-by: Zhirun Yan > Signed-off-by: Haiyue Wang > --- > drivers/net/i40e/i40e_rxtx.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c > index e1152ff0e..cc953ad58 100644 > --- a/drivers/net/i40e/i40e_rxtx.c > +++ b/drivers/net/i40e/i40e_rxtx.c > @@ -2753,7 +2753,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) > i40e_dev_rx_queue_release(dev->data->rx_queues[i]); > dev->data->rx_queues[i] = NULL; > } > - dev->data->nb_rx_queues = 0; > > for (i = 0; i < dev->data->nb_tx_queues; i++) { > if (!dev->data->tx_queues[i]) > @@ -2761,7 +2760,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) > i40e_dev_tx_queue_release(dev->data->tx_queues[i]); > dev->data->tx_queues[i] = NULL; > } > - dev->data->nb_tx_queues = 0; > } > > #define I40E_FDIR_NUM_TX_DESC I40E_MIN_RING_DESC >
[dpdk-dev] [PATCH] vhost: batch used descriptors chains write-back with packed ring
Instead of writing back descriptors chains in order, let's write the first chain flags last in order to improve batching. With Kernel's pktgen benchmark, ~3% performance gain is measured. Signed-off-by: Maxime Coquelin --- lib/librte_vhost/virtio_net.c | 37 ++- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 5e1a1a727..f54642c2d 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -135,19 +135,10 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, struct vhost_virtqueue *vq) { int i; - uint16_t used_idx = vq->last_used_idx; + uint16_t head_flags, head_idx = vq->last_used_idx; - /* Split loop in two to save memory barriers */ - for (i = 0; i < vq->shadow_used_idx; i++) { - vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id; - vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len; - - used_idx += vq->shadow_used_packed[i].count; - if (used_idx >= vq->size) - used_idx -= vq->size; - } - - rte_smp_wmb(); + if (unlikely(vq->shadow_used_idx == 0)) + return; for (i = 0; i < vq->shadow_used_idx; i++) { uint16_t flags; @@ -165,12 +156,22 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, flags &= ~VRING_DESC_F_AVAIL; } - vq->desc_packed[vq->last_used_idx].flags = flags; + vq->desc_packed[vq->last_used_idx].id = + vq->shadow_used_packed[i].id; + vq->desc_packed[vq->last_used_idx].len = + vq->shadow_used_packed[i].len; + + if (i > 0) { + vq->desc_packed[vq->last_used_idx].flags = flags; - vhost_log_cache_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, vq->last_used_idx * sizeof(struct vring_packed_desc), sizeof(struct vring_packed_desc)); + } else { + head_idx = vq->last_used_idx; + head_flags = flags; + } vq->last_used_idx += vq->shadow_used_packed[i].count; if (vq->last_used_idx >= vq->size) { @@ -180,7 +181,15 @@ flush_shadow_used_ring_packed(struct virtio_net *dev, } rte_smp_wmb(); + + vq->desc_packed[head_idx].flags = head_flags; vq->shadow_used_idx = 0; + + vhost_log_cache_used_vring(dev, vq, + head_idx * + sizeof(struct vring_packed_desc), + sizeof(struct vring_packed_desc)); + vhost_log_cache_sync(dev, vq); } -- 2.17.2
[dpdk-dev] [RFC PATCH 2/3] mlx5: Implement support for read_clock
Signed-off-by: Tom Barbette --- drivers/net/mlx5/mlx5.c| 1 + drivers/net/mlx5/mlx5.h| 1 + drivers/net/mlx5/mlx5_ethdev.c | 31 +++ drivers/net/mlx5/mlx5_glue.c | 8 drivers/net/mlx5/mlx5_glue.h | 2 ++ 5 files changed, 43 insertions(+) diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 9e5cab169..ed799ab5a 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -372,6 +372,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .xstats_reset = mlx5_xstats_reset, .xstats_get_names = mlx5_xstats_get_names, .dev_infos_get = mlx5_dev_infos_get, + .read_clock = mlx5_read_clock, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, .rx_queue_setup = mlx5_rx_queue_setup, diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index bc500b2bc..3972debf5 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -258,6 +258,7 @@ int mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags); int mlx5_dev_configure(struct rte_eth_dev *dev); void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info); +int mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *time); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete); int mlx5_force_link_status_change(struct rte_eth_dev *dev, int status); diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index d178ed6a1..eb97ab27e 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -557,6 +557,37 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) } } +/** + * Get device current raw clock counter + * + * @param dev + * Pointer to Ethernet device structure. + * + * @param[out] time + * Current raw clock counter of the device. + * + * @return + * 0 if the clock has correctly been read + */ +int +mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock) +{ + struct priv *priv = dev->data->dev_private; + struct ibv_values_ex values; + int err = 0; + + values.comp_mask = IBV_VALUES_MASK_RAW_CLOCK; + err = mlx5_glue->query_rt_values_ex(priv->ctx, &values); + if (err != 0) { + DRV_LOG(WARNING, "Could not query the clock !"); + return err; + } + + *clock = values.raw_clock.tv_nsec; + return 0; +} + + /** * Get supported packet types. * diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c index dd10ad6de..e23296519 100644 --- a/drivers/net/mlx5/mlx5_glue.c +++ b/drivers/net/mlx5/mlx5_glue.c @@ -86,6 +86,13 @@ mlx5_glue_query_device_ex(struct ibv_context *context, return ibv_query_device_ex(context, input, attr); } +static int +mlx5_glue_query_rt_values_ex(struct ibv_context *context, + struct ibv_values_ex *values) +{ + return ibv_query_rt_values_ex(context, values); +} + static int mlx5_glue_query_port(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr) @@ -491,6 +498,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ .close_device = mlx5_glue_close_device, .query_device = mlx5_glue_query_device, .query_device_ex = mlx5_glue_query_device_ex, + .query_rt_values_ex = mlx5_glue_query_rt_values_ex, .query_port = mlx5_glue_query_port, .create_comp_channel = mlx5_glue_create_comp_channel, .destroy_comp_channel = mlx5_glue_destroy_comp_channel, diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h index 2d92ba8bc..31ebee72a 100644 --- a/drivers/net/mlx5/mlx5_glue.h +++ b/drivers/net/mlx5/mlx5_glue.h @@ -70,6 +70,8 @@ struct mlx5_glue { int (*query_device_ex)(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr); + int (*query_rt_values_ex)(struct ibv_context *context, + struct ibv_values_ex *values); int (*query_port)(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr *port_attr); struct ibv_comp_channel *(*create_comp_channel) -- 2.17.1
[dpdk-dev] [RFC PATCH 1/3] rte_ethdev: Add API function to read dev clock
Add rte_eth_read_clock to read the current clock of a devide. The main use is to get the current clock as written by the driver in the timestamp field of the pkt mbuf when timestamp offloading is enabled. This function was missing to allow users to convert that RX timestamp field to real time without the complexity of the rte_timesync* facility. One can derivate the clock frequency by calling twice read_clock and then keep a common time base. Signed-off-by: Tom Barbette --- doc/guides/nics/features.rst | 1 + lib/librte_ethdev/rte_ethdev.c | 13 + lib/librte_ethdev/rte_ethdev.h | 23 +++ lib/librte_ethdev/rte_ethdev_core.h | 6 ++ lib/librte_ethdev/rte_ethdev_version.map | 1 + lib/librte_mbuf/rte_mbuf.h | 2 ++ 6 files changed, 46 insertions(+) diff --git a/doc/guides/nics/features.rst b/doc/guides/nics/features.rst index d3f904839..45484a30f 100644 --- a/doc/guides/nics/features.rst +++ b/doc/guides/nics/features.rst @@ -602,6 +602,7 @@ Supports Timestamp. * **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_TIMESTAMP``. * **[provides] mbuf**: ``mbuf.timestamp``. * **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa: DEV_RX_OFFLOAD_TIMESTAMP``. +* **[implements] eth_dev_ops**: ``read_clock``. .. _nic_features_macsec_offload: diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 5f858174b..48e8218b2 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -4124,6 +4124,19 @@ rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp) timestamp)); } +int +rte_eth_read_clock(uint16_t port_id, uint64_t *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->read_clock, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->read_clock)(dev, + timestamp)); +} + int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) { diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h index 1960f3a2d..6e3a48308 100644 --- a/lib/librte_ethdev/rte_ethdev.h +++ b/lib/librte_ethdev/rte_ethdev.h @@ -3642,6 +3642,29 @@ int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time); */ int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Read the current clock counter of an Ethernet device + * + * This returns the current raw clock value of an Ethernet device. + * The value returned here is from the same clock than the one + * filling timestamp field of RX packets. Therefore it can be used + * to compute a precise conversion of the device clock to the real time. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the uint64_t that holds the raw clock value. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int __rte_experimental rte_eth_read_clock(uint16_t port_id, uint64_t *time); + /** * Config l2 tunnel ether type of an Ethernet device for filtering specific * tunnel packets by ether type. diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h index 8f03f83f6..86806b3eb 100644 --- a/lib/librte_ethdev/rte_ethdev_core.h +++ b/lib/librte_ethdev/rte_ethdev_core.h @@ -322,6 +322,10 @@ typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, const struct timespec *timestamp); /**< @internal Function used to get time from the device clock */ +typedef int (*eth_read_clock)(struct rte_eth_dev *dev, + uint64_t *timestamp); +/**< @internal Function used to get the current value of the device clock. */ + typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, struct rte_dev_reg_info *info); /**< @internal Retrieve registers */ @@ -496,6 +500,8 @@ struct eth_dev_ops { eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ eth_timesync_write_timetimesync_write_time; /** Set the device clock time. */ + eth_read_clock read_clock; + eth_xstats_get_by_id_t xstats_get_by_id; /**< Get extended device statistic values by ID. */ eth_xstats_get_names_by_id_t xstats_get_names_by_id; diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map index 92ac3de25..12d6c3c1d 100644 --- a/lib/librte_ethdev/rte_ethdev_version.map +++ b/lib/librte_ethdev/rte_ethdev_version.map @@
[dpdk-dev] [RFC PATCH 0/3] Add rte_eth_read_clock API
Some NICs allows to timestamp packets, but do not support the full PTP synchronization process. Hence, the value set in the mbuf timestamp field is only the raw value of an internal clock. To make sense of this value, one at least needs to be able to query the current hardware clock value. As with the TSC, from there a frequency can be derieved by querying multiple time the current value of the internal clock with some known delay between the queries. This RFC patch series adds support for MLX5. An example is provided in the rxtx_callback application. It has been updated to display, on top of the software latency in cycles, the total latency since the packet was received in hardware. The API is used to compute a delta in the TX callback. The raw amount of ticks is converted to cycles using the technique describe above. Aside from offloading timestamping, which relieve the software from a few operations, this allows to get much more precision when studying the source of the latency in a system. Eg. in our 100G, CX5 setup the rxtx callback application shows SW latency is around 74 cycles (TSC is 3.2Ghz), but the latency including NIC processing, PCIe, and queuing is around 196 cycles. The RFC lacks the documentation update for the sample application. I wanted further validation before doing so. I'm not sure if it's fine to update an example to show a "double feature". At the same time a full app for this would be overkill, and the information nicely complements the one in rxtx_callback. Tom Barbette (3): rte_ethdev: Add API function to read dev clock mlx5: Implement support for read_clock rxtx_callbacks: Add support for HW timestamp doc/guides/nics/features.rst | 1 + drivers/net/mlx5/mlx5.c | 1 + drivers/net/mlx5/mlx5.h | 1 + drivers/net/mlx5/mlx5_ethdev.c | 31 + drivers/net/mlx5/mlx5_glue.c | 8 +++ drivers/net/mlx5/mlx5_glue.h | 2 + examples/rxtx_callbacks/Makefile | 2 + examples/rxtx_callbacks/main.c | 87 ++-- examples/rxtx_callbacks/meson.build | 1 + lib/librte_ethdev/rte_ethdev.c | 13 lib/librte_ethdev/rte_ethdev.h | 23 +++ lib/librte_ethdev/rte_ethdev_core.h | 6 ++ lib/librte_ethdev/rte_ethdev_version.map | 1 + lib/librte_mbuf/rte_mbuf.h | 2 + 14 files changed, 175 insertions(+), 4 deletions(-) -- 2.17.1
[dpdk-dev] [RFC PATCH 3/3] rxtx_callbacks: Add support for HW timestamp
Use rxtx callback to demonstrate a way to use rte_eth_read_clock to convert the hardware timestamps to an amount of cycles. This allows to get the amount of time the packet spent since its entry in the device. While the regular latency only shows the latency from when it entered the software stack. Signed-off-by: Tom Barbette --- examples/rxtx_callbacks/Makefile| 2 + examples/rxtx_callbacks/main.c | 87 +++-- examples/rxtx_callbacks/meson.build | 1 + 3 files changed, 86 insertions(+), 4 deletions(-) diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile index e9d30d56f..e70c081f2 100644 --- a/examples/rxtx_callbacks/Makefile +++ b/examples/rxtx_callbacks/Makefile @@ -50,6 +50,8 @@ include $(RTE_SDK)/mk/rte.vars.mk CFLAGS += $(WERROR_FLAGS) + +CFLAGS += -DALLOW_EXPERIMENTAL_API # workaround for a gcc bug with noreturn attribute # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c index 2058be627..cd4e86f0f 100644 --- a/examples/rxtx_callbacks/main.c +++ b/examples/rxtx_callbacks/main.c @@ -10,6 +10,8 @@ #include #include +#include + #define RX_RING_SIZE 1024 #define TX_RING_SIZE 1024 @@ -17,6 +19,9 @@ #define MBUF_CACHE_SIZE 250 #define BURST_SIZE 32 +static const char usage[] = + "%s EAL_ARGS -- [-t]\n"; + static const struct rte_eth_conf port_conf_default = { .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, @@ -25,9 +30,14 @@ static const struct rte_eth_conf port_conf_default = { static struct { uint64_t total_cycles; + uint64_t total_queue_cycles; uint64_t total_pkts; } latency_numbers; +int hw_timestamping; + +#define TICKS_PER_CYCLE_SHIFT 16 +uint64_t ticks_per_cycle_mult; static uint16_t add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused, @@ -43,22 +53,42 @@ add_timestamps(uint16_t port __rte_unused, uint16_t qidx __rte_unused, } static uint16_t -calc_latency(uint16_t port __rte_unused, uint16_t qidx __rte_unused, +calc_latency(uint16_t port, uint16_t qidx __rte_unused, struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused) { uint64_t cycles = 0; + uint64_t queue_ticks = 0; uint64_t now = rte_rdtsc(); + uint64_t ticks; unsigned i; - for (i = 0; i < nb_pkts; i++) + if (hw_timestamping) + rte_eth_read_clock(port, &ticks); + + for (i = 0; i < nb_pkts; i++) { cycles += now - pkts[i]->udata64; + if (hw_timestamping) + queue_ticks += ticks - pkts[i]->timestamp; + } + latency_numbers.total_cycles += cycles; + if (hw_timestamping) + latency_numbers.total_queue_cycles += (queue_ticks + * ticks_per_cycle_mult) >> TICKS_PER_CYCLE_SHIFT; + latency_numbers.total_pkts += nb_pkts; if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) { printf("Latency = %"PRIu64" cycles\n", latency_numbers.total_cycles / latency_numbers.total_pkts); - latency_numbers.total_cycles = latency_numbers.total_pkts = 0; + if (hw_timestamping) { + printf("Latency from HW = %"PRIu64" cycles\n", + latency_numbers.total_queue_cycles + / latency_numbers.total_pkts); + } + latency_numbers.total_cycles = 0; + latency_numbers.total_queue_cycles = 0; + latency_numbers.total_pkts = 0; } return nb_pkts; } @@ -77,6 +107,7 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) int retval; uint16_t q; struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf rxconf; struct rte_eth_txconf txconf; if (!rte_eth_dev_is_valid_port(port)) @@ -95,9 +126,20 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) if (retval != 0) return retval; + rxconf = dev_info.default_rxconf; + + if (hw_timestamping) { + if (!(dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TIMESTAMP)) { + printf("\nERROR: Port %u does not support hardware timestamping\n" + , port); + return -1; + } + rxconf.offloads |= DEV_RX_OFFLOAD_TIMESTAMP; + } + for (q = 0; q < rx_rings; q++) { retval = rte_eth_rx_queue_setup(port, q, nb_rxd, - rte_eth_dev_socket_id(port), NULL, mbuf_pool); + rte_eth_dev_socket_id(port), &rxconf, mbuf_pool); if (retval < 0) return retval; } @@ -115,6 +157,25 @@ port_init(uint16_t port, struct rte_mempool *mbuf_pool) if (re
[dpdk-dev] [PATCH 0/9] support SW assisted VDPA live migration
In the previous VDPA implementation we have enabled live migration support by HW accelerator doing all the stuff, including dirty page logging and device status report/restore. In this mode VDPA sample daemon and device driver just takes care of the control path and does not involve in data path, so there's almost 0 CPU resource usage. This mode requires device to have dirty page logging capability. This patch series adds live migration support for devices without logging capability. VDPA driver could set up a relay thread standing between the guest and device when live migration happens, this relay intervenes into the communication between guest virtio driver and physical virtio accelerator, it helps device to do a vring relay and passingly log dirty pages. Thus some CPU resource will be consumed in this scenario, percentage depending on the network throughput. Some new helpers are added into vhost lib for this VDPA SW fallback: - rte_vhost_host_notifier_ctrl, to enable/disable the VDPA direct-IO datapath. - rte_vdpa_relay_avail_ring, to relay the available ring from guest vring to mediate vring. - rte_vdpa_relay_used_ring, to relay the used ring from mediate vring to guest vring. Some existing helpers are also leveraged for SW fallback setup, like VFIO interrupt configuration, IOMMU table programming, etc. This patch enables this SW assisted VDPA live migration in ifc driver. Since ifcvf also supports HW dirty page logging, we add a new devarg for user to select if the SW mode is used or not. Xiao Wang (9): vhost: provide helper for host notifier ctrl vhost: provide helpers for virtio ring relay net/ifc: dump debug message for error net/ifc: store only registered device instance net/ifc: detect if VDPA mode is specified net/ifc: add devarg for LM mode net/ifc: use lib API for used ring logging net/ifc: support SW assisted VDPA live migration doc: update ifc NIC document doc/guides/nics/ifc.rst| 7 + drivers/net/ifc/base/ifcvf.h | 1 + drivers/net/ifc/ifcvf_vdpa.c | 463 ++--- lib/librte_vhost/rte_vdpa.h| 56 lib/librte_vhost/rte_vhost_version.map | 3 + lib/librte_vhost/vdpa.c| 173 lib/librte_vhost/vhost.c | 3 +- lib/librte_vhost/vhost.h | 40 +++ lib/librte_vhost/vhost_user.c | 7 +- lib/librte_vhost/virtio_net.c | 39 --- 10 files changed, 714 insertions(+), 78 deletions(-) -- 2.15.1
[dpdk-dev] [PATCH 2/9] vhost: provide helpers for virtio ring relay
This patch provides two helpers for vdpa device driver to perform a relay between the guest virtio ring and a mediate virtio ring. The available ring relay will synchronize the available entries, and helps to do desc validity checking. The used ring relay will synchronize the used entries from mediate ring to guest ring, and helps to do dirty page logging for live migration. The next patch will leverage these two helpers. Signed-off-by: Xiao Wang --- lib/librte_vhost/rte_vdpa.h| 38 lib/librte_vhost/rte_vhost_version.map | 2 + lib/librte_vhost/vdpa.c| 173 + lib/librte_vhost/vhost.h | 40 lib/librte_vhost/virtio_net.c | 39 5 files changed, 253 insertions(+), 39 deletions(-) diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h index 89c5bb6b3..0c44b9080 100644 --- a/lib/librte_vhost/rte_vdpa.h +++ b/lib/librte_vhost/rte_vdpa.h @@ -173,4 +173,42 @@ rte_vdpa_get_device_num(void); */ int __rte_experimental rte_vhost_host_notifier_ctrl(int vid, bool enable); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Synchronize the available ring from guest to mediate ring, help to + * check desc validity to protect against malicious guest driver. + * + * @param vid + * vhost device id + * @param qid + * vhost queue id + * @param m_vring + * mediate virtio ring pointer + * @return + * number of synced available entries on success, -1 on failure + */ +int __rte_experimental +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Synchronize the used ring from mediate ring to guest, log dirty + * page for each Rx buffer used. + * + * @param vid + * vhost device id + * @param qid + * vhost queue id + * @param m_vring + * mediate virtio ring pointer + * @return + * number of synced used entries on success, -1 on failure + */ +int __rte_experimental +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring); #endif /* _RTE_VDPA_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index 22302e972..0ad0fbea2 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -84,4 +84,6 @@ EXPERIMENTAL { rte_vhost_crypto_set_zero_copy; rte_vhost_va_from_guest_pa; rte_vhost_host_notifier_ctrl; + rte_vdpa_relay_avail_ring; + rte_vdpa_relay_used_ring; }; diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c index e7d849ee0..e41117776 100644 --- a/lib/librte_vhost/vdpa.c +++ b/lib/librte_vhost/vdpa.c @@ -122,3 +122,176 @@ rte_vdpa_get_device_num(void) { return vdpa_device_num; } + +static int +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t desc_iova, uint64_t desc_len, uint8_t perm) +{ + uint64_t desc_addr, desc_chunck_len; + + while (desc_len) { + desc_chunck_len = desc_len; + desc_addr = vhost_iova_to_vva(dev, vq, + desc_iova, + &desc_chunck_len, + perm); + + if (!desc_addr) + return -1; + + desc_len -= desc_chunck_len; + desc_iova += desc_chunck_len; + } + + return 0; +} + +int +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring) +{ + struct virtio_net *dev = get_device(vid); + uint16_t idx, idx_m, desc_id; + struct vring_desc desc; + struct vhost_virtqueue *vq; + struct vring_desc *desc_ring; + struct vring_desc *idesc = NULL; + uint64_t dlen; + int ret; + + if (!dev) + return -1; + + vq = dev->virtqueue[qid]; + idx = vq->avail->idx; + idx_m = m_vring->avail->idx; + ret = idx - idx_m; + + while (idx_m != idx) { + /* avail entry copy */ + desc_id = vq->avail->ring[idx_m % vq->size]; + m_vring->avail->ring[idx_m % vq->size] = desc_id; + desc_ring = vq->desc; + + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) { + dlen = vq->desc[desc_id].len; + desc_ring = (struct vring_desc *)(uintptr_t) + vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr, + &dlen, + VHOST_ACCESS_RO); + if (unlikely(!desc_ring)) + return -1; + + if (unlikely(dlen < vq->desc[idx].len)) { + idesc = alloc_copy_ind_table(dev, vq, + vq->desc[idx].addr, vq->desc[idx].len); + i
[dpdk-dev] [PATCH 1/9] vhost: provide helper for host notifier ctrl
VDPA driver can decide if it needs to enable/disable the EPT mapping, exposing a API can allow flexibility. A later patch will base on this. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 3 +++ lib/librte_vhost/rte_vdpa.h| 18 ++ lib/librte_vhost/rte_vhost_version.map | 1 + lib/librte_vhost/vhost.c | 3 +-- lib/librte_vhost/vhost_user.c | 7 +-- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 97a57f182..e844109f3 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -556,6 +556,9 @@ ifcvf_dev_config(int vid) rte_atomic32_set(&internal->dev_attached, 1); update_datapath(internal); + if (rte_vhost_host_notifier_ctrl(vid, true) != 0) + DRV_LOG(NOTICE, "vDPA (%d): software relay is used.", did); + return 0; } diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h index a418da47c..89c5bb6b3 100644 --- a/lib/librte_vhost/rte_vdpa.h +++ b/lib/librte_vhost/rte_vdpa.h @@ -11,6 +11,8 @@ * Device specific vhost lib */ +#include + #include #include "rte_vhost.h" @@ -155,4 +157,20 @@ rte_vdpa_get_device(int did); */ int __rte_experimental rte_vdpa_get_device_num(void); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Enable/Disable EPT mapping for a vdpa port. + * + * @param vid + * vhost device id + * @enable + * true for EPT map, false for EPT unmap + * @return + * 0 on success, -1 on failure + */ +int __rte_experimental +rte_vhost_host_notifier_ctrl(int vid, bool enable); #endif /* _RTE_VDPA_H_ */ diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map index ae39b6e21..22302e972 100644 --- a/lib/librte_vhost/rte_vhost_version.map +++ b/lib/librte_vhost/rte_vhost_version.map @@ -83,4 +83,5 @@ EXPERIMENTAL { rte_vhost_crypto_finalize_requests; rte_vhost_crypto_set_zero_copy; rte_vhost_va_from_guest_pa; + rte_vhost_host_notifier_ctrl; }; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 70ac6bc9c..e7a60e0b4 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -408,8 +408,7 @@ vhost_detach_vdpa_device(int vid) if (dev == NULL) return; - vhost_user_host_notifier_ctrl(vid, false); - + vhost_destroy_device_notify(dev); dev->vdpa_dev_id = -1; } diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 3ea64eba6..5e0da0589 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -2045,11 +2045,6 @@ vhost_user_msg_handler(int vid, int fd) if (vdpa_dev->ops->dev_conf) vdpa_dev->ops->dev_conf(dev->vid); dev->flags |= VIRTIO_DEV_VDPA_CONFIGURED; - if (vhost_user_host_notifier_ctrl(dev->vid, true) != 0) { - RTE_LOG(INFO, VHOST_CONFIG, - "(%d) software relay is used for vDPA, performance may be low.\n", - dev->vid); - } } return 0; @@ -2144,7 +2139,7 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev, return process_slave_message_reply(dev, &msg); } -int vhost_user_host_notifier_ctrl(int vid, bool enable) +int rte_vhost_host_notifier_ctrl(int vid, bool enable) { struct virtio_net *dev; struct rte_vdpa_device *vdpa_dev; -- 2.15.1
[dpdk-dev] [PATCH 3/9] net/ifc: dump debug message for error
Driver probe may fail for different causes, debug message is helpful for debugging issue. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index e844109f3..aacd5f9bf 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -22,7 +22,7 @@ #define DRV_LOG(level, fmt, args...) \ rte_log(RTE_LOG_ ## level, ifcvf_vdpa_logtype, \ - "%s(): " fmt "\n", __func__, ##args) + "IFCVF %s(): " fmt "\n", __func__, ##args) #ifndef PAGE_SIZE #define PAGE_SIZE 4096 @@ -756,11 +756,16 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, internal->pdev = pci_dev; rte_spinlock_init(&internal->lock); - if (ifcvf_vfio_setup(internal) < 0) - return -1; - if (ifcvf_init_hw(&internal->hw, internal->pdev) < 0) - return -1; + if (ifcvf_vfio_setup(internal) < 0) { + DRV_LOG(ERR, "failed to setup device %s", pci_dev->name); + goto error; + } + + if (ifcvf_init_hw(&internal->hw, internal->pdev) < 0) { + DRV_LOG(ERR, "failed to init device %s", pci_dev->name); + goto error; + } internal->max_queues = IFCVF_MAX_QUEUES; features = ifcvf_get_features(&internal->hw); @@ -782,8 +787,10 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, internal->did = rte_vdpa_register_device(&internal->dev_addr, &ifcvf_ops); - if (internal->did < 0) + if (internal->did < 0) { + DRV_LOG(ERR, "failed to register device %s", pci_dev->name); goto error; + } rte_atomic32_set(&internal->started, 1); update_datapath(internal); -- 2.15.1
[dpdk-dev] [PATCH 7/9] net/ifc: use lib API for used ring logging
Vhost lib has already provided a helper for used ring logging, driver could use it to reduce code. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 27 --- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index e9cc8d7bc..6c64ac4f7 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -31,6 +31,9 @@ #define PAGE_SIZE 4096 #endif +#define IFCVF_USED_RING_LEN(size) \ + ((size) * sizeof(struct vring_used_elem) + sizeof(uint16_t) * 3) + #define IFCVF_VDPA_MODE"vdpa" #define IFCVF_SW_FALLBACK_LM "swlm" @@ -288,21 +291,6 @@ vdpa_ifcvf_start(struct ifcvf_internal *internal) return ifcvf_start_hw(&internal->hw); } -static void -ifcvf_used_ring_log(struct ifcvf_hw *hw, uint32_t queue, uint8_t *log_buf) -{ - uint32_t i, size; - uint64_t pfn; - - pfn = hw->vring[queue].used / PAGE_SIZE; - size = hw->vring[queue].size * sizeof(struct vring_used_elem) + - sizeof(uint16_t) * 3; - - for (i = 0; i <= size / PAGE_SIZE; i++) - __sync_fetch_and_or_8(&log_buf[(pfn + i) / 8], - 1 << ((pfn + i) % 8)); -} - static void vdpa_ifcvf_stop(struct ifcvf_internal *internal) { @@ -311,7 +299,7 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal) int vid; uint64_t features; uint64_t log_base, log_size; - uint8_t *log_buf; + uint64_t len; vid = internal->vid; ifcvf_stop_hw(hw); @@ -330,9 +318,10 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal) * IFCVF marks dirty memory pages for only packet buffer, * SW helps to mark the used ring as dirty after device stops. */ - log_buf = (uint8_t *)(uintptr_t)log_base; - for (i = 0; i < hw->nr_vring; i++) - ifcvf_used_ring_log(hw, i, log_buf); + for (i = 0; i < hw->nr_vring; i++) { + len = IFCVF_USED_RING_LEN(hw->vring[i].size); + rte_vhost_log_used_vring(vid, i, 0, len); + } } } -- 2.15.1
[dpdk-dev] [PATCH 4/9] net/ifc: store only registered device instance
If driver fails to register ifc VF device into vhost lib, then this device should not be stored. Fixes: a3f8150eac6d ("net/ifcvf: add ifcvf vDPA driver") cc: sta...@dpdk.org Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index aacd5f9bf..6fcd50b73 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -781,10 +781,6 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, internal->dev_addr.type = PCI_ADDR; list->internal = internal; - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - internal->did = rte_vdpa_register_device(&internal->dev_addr, &ifcvf_ops); if (internal->did < 0) { @@ -792,6 +788,10 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, goto error; } + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + rte_atomic32_set(&internal->started, 1); update_datapath(internal); -- 2.15.1
[dpdk-dev] [PATCH 5/9] net/ifc: detect if VDPA mode is specified
If user wants the VF to be used in VDPA (vhost data path acceleration) mode, then the user can add a "vdpa=1" parameter for the device. So if driver doesn't not find this option, it should quit and let the bus continue the probe. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 47 1 file changed, 47 insertions(+) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 6fcd50b73..c0e50354a 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "base/ifcvf.h" @@ -28,6 +30,13 @@ #define PAGE_SIZE 4096 #endif +#define IFCVF_VDPA_MODE"vdpa" + +static const char * const ifcvf_valid_arguments[] = { + IFCVF_VDPA_MODE, + NULL +}; + static int ifcvf_vdpa_logtype; struct ifcvf_internal { @@ -735,6 +744,21 @@ static struct rte_vdpa_dev_ops ifcvf_ops = { .get_notify_area = ifcvf_get_notify_area, }; +static inline int +open_int(const char *key __rte_unused, const char *value, void *extra_args) +{ + uint16_t *n = extra_args; + + if (value == NULL || extra_args == NULL) + return -EINVAL; + + *n = (uint16_t)strtoul(value, NULL, 0); + if (*n == USHRT_MAX && errno == ERANGE) + return -1; + + return 0; +} + static int ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) @@ -742,10 +766,31 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, uint64_t features; struct ifcvf_internal *internal = NULL; struct internal_list *list = NULL; + int vdpa_mode = 0; + struct rte_kvargs *kvlist = NULL; + int ret = 0; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + kvlist = rte_kvargs_parse(pci_dev->device.devargs->args, + ifcvf_valid_arguments); + if (kvlist == NULL) + return 1; + + /* probe only when vdpa mode is specified */ + if (rte_kvargs_count(kvlist, IFCVF_VDPA_MODE) == 0) { + rte_kvargs_free(kvlist); + return 1; + } + + ret = rte_kvargs_process(kvlist, IFCVF_VDPA_MODE, &open_int, + &vdpa_mode); + if (ret < 0 || vdpa_mode == 0) { + rte_kvargs_free(kvlist); + return 1; + } + list = rte_zmalloc("ifcvf", sizeof(*list), 0); if (list == NULL) goto error; @@ -795,9 +840,11 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, rte_atomic32_set(&internal->started, 1); update_datapath(internal); + rte_kvargs_free(kvlist); return 0; error: + rte_kvargs_free(kvlist); rte_free(list); rte_free(internal); return -1; -- 2.15.1
[dpdk-dev] [PATCH 6/9] net/ifc: add devarg for LM mode
This patch series enables a new method for live migration, i.e. software assisted live migration. This patch provides a device argument for user to choose the methold. When "swlm=1", driver/device will do live migration with a relay thread dealing with dirty page logging. Without this parameter, device will do dirty page logging and there's no relay thread consuming CPU resource. Signed-off-by: Xiao Wang --- drivers/net/ifc/ifcvf_vdpa.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index c0e50354a..e9cc8d7bc 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -31,9 +32,11 @@ #endif #define IFCVF_VDPA_MODE"vdpa" +#define IFCVF_SW_FALLBACK_LM "swlm" static const char * const ifcvf_valid_arguments[] = { IFCVF_VDPA_MODE, + IFCVF_SW_FALLBACK_LM, NULL }; @@ -56,6 +59,7 @@ struct ifcvf_internal { rte_atomic32_t dev_attached; rte_atomic32_t running; rte_spinlock_t lock; + bool sw_lm; }; struct internal_list { @@ -767,6 +771,7 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct ifcvf_internal *internal = NULL; struct internal_list *list = NULL; int vdpa_mode = 0; + int sw_fallback_lm = 0; struct rte_kvargs *kvlist = NULL; int ret = 0; @@ -826,6 +831,16 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, internal->dev_addr.type = PCI_ADDR; list->internal = internal; + if (rte_kvargs_count(kvlist, IFCVF_SW_FALLBACK_LM)) { + ret = rte_kvargs_process(kvlist, IFCVF_SW_FALLBACK_LM, + &open_int, &sw_fallback_lm); + if (ret < 0) + goto error; + internal->sw_lm = sw_fallback_lm ? true : false; + } else { + internal->sw_lm = false; + } + internal->did = rte_vdpa_register_device(&internal->dev_addr, &ifcvf_ops); if (internal->did < 0) { -- 2.15.1
[dpdk-dev] [PATCH 8/9] net/ifc: support SW assisted VDPA live migration
In SW assisted live migration mode, driver will stop the device and setup a mediate virtio ring to relay the communication between the virtio driver and the VDPA device. This data path intervention will allow SW to help on guest dirty page logging for live migration. This SW fallback is event driven relay thread, so when the network throughput is low, this SW fallback will take little CPU resource, but when the throughput goes up, the relay thread's CPU usage will goes up accordinly. User needs to take all the factors including CPU usage, guest perf degradation, etc. into consideration when selecting the live migration support mode. Signed-off-by: Xiao Wang --- drivers/net/ifc/base/ifcvf.h | 1 + drivers/net/ifc/ifcvf_vdpa.c | 346 ++- 2 files changed, 344 insertions(+), 3 deletions(-) diff --git a/drivers/net/ifc/base/ifcvf.h b/drivers/net/ifc/base/ifcvf.h index f026c70ab..8eb70ae9d 100644 --- a/drivers/net/ifc/base/ifcvf.h +++ b/drivers/net/ifc/base/ifcvf.h @@ -50,6 +50,7 @@ #define IFCVF_LM_ENABLE_VF 0x1 #define IFCVF_LM_ENABLE_PF 0x3 #define IFCVF_LOG_BASE 0x1000 +#define IFCVF_MEDIATE_VRING0x2000 #define IFCVF_32_BIT_MASK 0x diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 6c64ac4f7..875a0009d 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -63,6 +63,9 @@ struct ifcvf_internal { rte_atomic32_t running; rte_spinlock_t lock; bool sw_lm; + bool sw_fallback_running; + /* mediated vring for sw fallback */ + struct vring m_vring[IFCVF_MAX_QUEUES * 2]; }; struct internal_list { @@ -308,6 +311,9 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal) rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, hw->vring[i].last_used_idx); + if (internal->sw_lm) + return; + rte_vhost_get_negotiated_features(vid, &features); if (RTE_VHOST_NEED_LOG(features)) { ifcvf_disable_logging(hw); @@ -539,6 +545,318 @@ update_datapath(struct ifcvf_internal *internal) return ret; } +static int +m_ifcvf_start(struct ifcvf_internal *internal) +{ + struct ifcvf_hw *hw = &internal->hw; + uint32_t i, nr_vring; + int vid, ret; + struct rte_vhost_vring vq; + void *vring_buf; + uint64_t m_vring_iova = IFCVF_MEDIATE_VRING; + uint64_t size; + uint64_t gpa; + + vid = internal->vid; + nr_vring = rte_vhost_get_vring_num(vid); + rte_vhost_get_negotiated_features(vid, &hw->req_features); + + for (i = 0; i < nr_vring; i++) { + rte_vhost_get_vhost_vring(vid, i, &vq); + + size = RTE_ALIGN_CEIL(vring_size(vq.size, PAGE_SIZE), + PAGE_SIZE); + vring_buf = rte_zmalloc("ifcvf", size, PAGE_SIZE); + vring_init(&internal->m_vring[i], vq.size, vring_buf, + PAGE_SIZE); + + ret = rte_vfio_container_dma_map(internal->vfio_container_fd, + (uint64_t)(uintptr_t)vring_buf, m_vring_iova, size); + if (ret < 0) { + DRV_LOG(ERR, "mediate vring DMA map failed."); + goto error; + } + + gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc); + if (gpa == 0) { + DRV_LOG(ERR, "Fail to get GPA for descriptor ring."); + return -1; + } + hw->vring[i].desc = gpa; + + hw->vring[i].avail = m_vring_iova + + (char *)internal->m_vring[i].avail - + (char *)internal->m_vring[i].desc; + + hw->vring[i].used = m_vring_iova + + (char *)internal->m_vring[i].used - + (char *)internal->m_vring[i].desc; + + hw->vring[i].size = vq.size; + + rte_vhost_get_vring_base(vid, i, &hw->vring[i].last_avail_idx, + &hw->vring[i].last_used_idx); + + m_vring_iova += size; + } + hw->nr_vring = nr_vring; + + return ifcvf_start_hw(&internal->hw); + +error: + for (i = 0; i < nr_vring; i++) + if (internal->m_vring[i].desc) + rte_free(internal->m_vring[i].desc); + + return -1; +} + +static int +m_ifcvf_stop(struct ifcvf_internal *internal) +{ + int vid; + uint32_t i; + struct rte_vhost_vring vq; + struct ifcvf_hw *hw = &internal->hw; + uint64_t m_vring_iova = IFCVF_MEDIATE_VRING; + uint64_t size, len; + + vid = internal->vid; + ifcvf_stop_hw(hw); + + for (i = 0; i < hw->nr_vring; i++) { + rte_vhost_get_vhost_vring(vid, i, &vq); + l
[dpdk-dev] [PATCH 9/9] doc: update ifc NIC document
Signed-off-by: Xiao Wang --- doc/guides/nics/ifc.rst | 7 +++ 1 file changed, 7 insertions(+) diff --git a/doc/guides/nics/ifc.rst b/doc/guides/nics/ifc.rst index 48f9adf1d..a16f2982f 100644 --- a/doc/guides/nics/ifc.rst +++ b/doc/guides/nics/ifc.rst @@ -39,6 +39,12 @@ the driver probe a new container is created for this device, with this container vDPA driver can program DMA remapping table with the VM's memory region information. +The device argument "swlm=1" will configure the driver into SW assisted live +migration mode. In this mode, the driver will set up a SW relay thread when LM +happens, this thread will help device to log dirty pages. Thus this mode does +not require HW to implement a dirty page logging function block, but will +consume some percentage of CPU resource depending on the network throughput. + Key IFCVF vDPA driver ops ~ @@ -70,6 +76,7 @@ Features Features of the IFCVF driver are: - Compatibility with virtio 0.95 and 1.0. +- SW assisted vDPA for live migration. Prerequisites -- 2.15.1
Re: [dpdk-dev] [PATCH] vhost: batch used descriptors chains write-back with packed ring
On Wed, Nov 28, 2018 at 10:47:00AM +0100, Maxime Coquelin wrote: Instead of writing back descriptors chains in order, let's write the first chain flags last in order to improve batching. With Kernel's pktgen benchmark, ~3% performance gain is measured. Signed-off-by: Maxime Coquelin --- lib/librte_vhost/virtio_net.c | 37 ++- 1 file changed, 23 insertions(+), 14 deletions(-) Tested-by: Jens Freimann Reviewed-by: Jens Freimann
Re: [dpdk-dev] [PATCH 3/3] lib/librte_meter: update abi to include new rfc4115 function
28/11/2018 10:27, Eelco Chaudron: > On 28 Nov 2018, at 9:38, David Marchand wrote: > > On Tue, Nov 27, 2018 at 4:22 PM Eelco Chaudron > > wrote: > >> --- a/lib/librte_meter/Makefile > >> +++ b/lib/librte_meter/Makefile > >> -LIBABIVER := 2 > >> +LIBABIVER := 3 > > > > As far as I understood the policy around the EXPERIMENTAL section, you > > don't need to bump the library version. > > Thought I would add the new function as none experimental, i.e. next > version, but checkpatch did not allow me to do this. > > Tried to find info on what the right process was, as these functions are > just another meter implementation using similar existing APIs. If anyone > has any background on this please point me to it. It is documented here: http://doc.dpdk.org/guides/contributing/versioning.html The case for "similar API" is not handled specifically so far. So you need to introduce it as experimental. > I changed the library version as an existing data structure changed > (which in theory should not change the location of the data), but the > ABI check popped warnings so I decided to increase the version. It deserves to analyze why the ABI check raises a warning. If it really needs to bump the ABI version, you should justify it in the commit message, and explain what changed in the ABI section of the release notes, plus update the version in the release notes.
Re: [dpdk-dev] [PATCH] mk: don't install meson.build in usertools
On Tue, 2018-11-27 at 20:35 +0100, Timothy Redaelli wrote: > In commit 7dd34c71de2a ("usertools: install with meson") meson.build > was > added in usertools directory and so it's copied to > $(datadir)/usertools > with "make install". > > This patch avoids to copy meson.build when installing usertools with > "make install". > > Signed-off-by: Timothy Redaelli > --- > mk/rte.sdkinstall.mk | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/mk/rte.sdkinstall.mk b/mk/rte.sdkinstall.mk > index 8296e6dbd..2d34b4e5a 100644 > --- a/mk/rte.sdkinstall.mk > +++ b/mk/rte.sdkinstall.mk > @@ -99,8 +99,9 @@ install-runtime: > --exclude 'app/cmdline*' --exclude app/test \ > --exclude app/testacl --exclude app/testpipeline app > | \ > tar -xf - -C $(DESTDIR)$(bindir) $(TAR_X_FLAGS) > - $(Q)$(call rte_mkdir, $(DESTDIR)$(datadir)) > - $(Q)cp $(CP_FLAGS) $(RTE_SDK)/usertools $(DESTDIR)$(datadir) > + $(Q)$(call rte_mkdir, $(DESTDIR)$(datadir)/usertools) > + $(Q)tar -cf - -C $(RTE_SDK) --exclude meson.build > usertools | \ > + tar -xf - -C $(DESTDIR)$(datadir)/usertools > $(TAR_X_FLAGS) > $(Q)$(call rte_mkdir, $(DESTDIR)$(sbindir)) > $(Q)$(call > rte_symlink,$(DESTDIR)$(datadir)/usertools/dpdk-devbind.py, \ > $(DESTDIR)$(sbindir)/dpdk- > devbind) Acked-by: Luca Boccassi Should probably add: Fixes: 7dd34c71de2a ("usertools: install with meson") Cc: sta...@dpdk.org -- Kind regards, Luca Boccassi
Re: [dpdk-dev] [PATCH] ip_frag: fix ipv6 when MTU sizes not aligned to 8 bytes
On Mon, 2018-11-26 at 23:56 -0500, Chas Williams wrote: > From: Chas Williams > > The same issue was fixed on for the ipv4 version of this routine in > commit 8d4d3a4f7337 ("ip_frag: handle MTU sizes not aligned to 8 > bytes"). > Briefly, the size of an ipv6 header is always 40 bytes. With an MTU > of > 1500, this will never produce a multiple of 8 bytes for the frag_size > and this routine can never succeed. Since RTE_ASSERTS are disabled by > default, this failure is tpyically ignored. > > To fix this, round down to the nearest 8 bytes and use this when > producing the fragments. > > Fixes: 0aa31d7a5929 ("ip_frag: add IPv6 fragmentation support") > > Signed-off-by: Chas Williams > --- > lib/librte_ip_frag/rte_ip_frag.h| 1 + > lib/librte_ip_frag/rte_ipv6_fragmentation.c | 18 +++--- > 2 files changed, 12 insertions(+), 7 deletions(-) Acked-by: Luca Boccassi -- Kind regards, Luca Boccassi
[dpdk-dev] [PATCH 1/2] devtools: report the incorrect section when complaining
It does not hurt reporting the incriminated section. Before: ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in a section other than the EXPERIMENTAL section of the version map After: ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in +EXPERIMENTAL section other than the EXPERIMENTAL section of the version map Signed-off-by: David Marchand --- Used http://patchwork.dpdk.org/patch/48354/ to test. --- devtools/check-symbol-change.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devtools/check-symbol-change.sh b/devtools/check-symbol-change.sh index 1d21e91..66741be 100755 --- a/devtools/check-symbol-change.sh +++ b/devtools/check-symbol-change.sh @@ -115,7 +115,7 @@ check_for_rule_violations() if [ $? -ne 0 ] then echo -n "ERROR: symbol $symname " - echo -n "is added in a section " + echo -n "is added in $secname section " echo -n "other than the EXPERIMENTAL " echo "section of the version map" ret=1 -- 1.8.3.1
[dpdk-dev] [PATCH 2/2] devtools: drop '+' from the section name
The incriminated commit did relax the condition to catch all sections but dropped the + removal which can trigger false detection of the special EXPERIMENTAL section when adding symbols and the section in the same patch. Fixes: 7281cf520f89 ("devtools: relax rule for identifying symbol section") Signed-off-by: David Marchand --- Used http://patchwork.dpdk.org/patch/48354/ to test. --- devtools/check-symbol-change.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/devtools/check-symbol-change.sh b/devtools/check-symbol-change.sh index 66741be..8b36ccd 100755 --- a/devtools/check-symbol-change.sh +++ b/devtools/check-symbol-change.sh @@ -31,6 +31,7 @@ build_map_changes() # Triggering this rule sets in_sec to 1, which actives the # symbol rule below /^.*{/ { + gsub("+", ""); if (in_map == 1) { sec=$(NF-1); in_sec=1; } -- 1.8.3.1
[dpdk-dev] set_tsc_freq() wrong return value - bug?
Hi all, during some tests with dpdk 18.02.2 on RedHat 7 kernel 3.10.0-862.el7.x86_64 using Intel X710 and 2 x Intel(R) Xeon(R) Gold 6130 CPU @ 2.10GHz I found the following. I put some log in the function set_tsc_freq as you can see below void set_tsc_freq(void) { uint64_t freq; freq = get_tsc_freq_arch(); RTE_LOG(WARNING, EAL, "Function get_tsc_freq_arch() returns %lu\n", freq); if (!freq) { freq = get_tsc_freq(); RTE_LOG(WARNING, EAL, "Function get_tsc_freq() returns %lu\n", freq); } if (!freq) { freq = estimate_tsc_freq(); RTE_LOG(WARNING, EAL, "Function estimate_tsc_freq() returns %lu\n", freq); } RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); eal_tsc_resolution_hz = freq; } the output of this function is: "EAL: Function get_tsc_freq_arch() returns 21" but with this value if I use the rte_get_timer_cycles() in my application and try to convert the return value into seconds I saw that the more time passes the more date is wrong. Moreover, if I comment some rows in the set_tsc_freq function this way void set_tsc_freq(void) { uint64_t freq; //freq = get_tsc_freq_arch(); //RTE_LOG(WARNING, EAL, "Function get_tsc_freq_arch() returns %lu\n", freq); //if (!freq) //{ freq = get_tsc_freq(); RTE_LOG(WARNING, EAL, "Function get_tsc_freq() returns %lu\n", freq); //} if (!freq) { freq = estimate_tsc_freq(); RTE_LOG(WARNING, EAL, "Function estimate_tsc_freq() returns %lu\n", freq); } RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); eal_tsc_resolution_hz = freq; } the result is "EAL: Function get_tsc_freq() returns 2094994596" with this value the conversion into seconds from rte_get_timer_cycles() is always correct. Is this a bug? Or am I doing something wrong? Regards, Matteo
Re: [dpdk-dev] [PATCH] examples/bond: fix initialization error
On 11/27/2018 6:57 PM, Chas Williams wrote: > > > On 11/13/2018 11:46 AM, Radu Nicolau wrote: >> Queue setup will fail if called before adding slaves. >> >> Fixes: 7a0665940fa8 ("net/bonding: inherit descriptor limits from slaves") >> Cc: sta...@dpdk.org >> >> Signed-off-by: Radu Nicolau > > Acked-by: Chas Williams Applied to dpdk-next-net/master, thanks.
[dpdk-dev] [PATCH v2 1/3] compress/isal: enable checksum support in driver
This patch adds checksum support in the ISA-L PMD for both compression and decompression. CRC32 is supported as well as Adler32. Signed-off-by: Lee Daly --- drivers/compress/isal/isal_compress_pmd.c | 80 ++- drivers/compress/isal/isal_compress_pmd_ops.c | 4 +- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/drivers/compress/isal/isal_compress_pmd.c b/drivers/compress/isal/isal_compress_pmd.c index 9f1e968..b087ed8 100644 --- a/drivers/compress/isal/isal_compress_pmd.c +++ b/drivers/compress/isal/isal_compress_pmd.c @@ -16,6 +16,8 @@ #define RTE_COMP_ISAL_LEVEL_ONE 1 #define RTE_COMP_ISAL_LEVEL_TWO 2 #define RTE_COMP_ISAL_LEVEL_THREE 3 /* Optimised for AVX512 & AVX2 only */ +#define CHKSUM_SZ_CRC 8 +#define CHKSUM_SZ_ADLER 4 int isal_logtype_driver; @@ -43,12 +45,6 @@ isal_comp_set_priv_xform_parameters(struct isal_priv_xform *priv_xform, } priv_xform->compress.algo = RTE_COMP_ALGO_DEFLATE; - /* Set private xform checksum - raw deflate by default */ - if (xform->compress.chksum != RTE_COMP_CHECKSUM_NONE) { - ISAL_PMD_LOG(ERR, "Checksum not supported\n"); - return -ENOTSUP; - } - /* Set private xform window size, 32K supported */ if (xform->compress.window_size == RTE_COMP_ISAL_WINDOW_SIZE) priv_xform->compress.window_size = @@ -77,6 +73,27 @@ isal_comp_set_priv_xform_parameters(struct isal_priv_xform *priv_xform, return -ENOTSUP; } + /* Set private xform checksum */ + switch (xform->compress.chksum) { + /* Raw deflate by default */ + case(RTE_COMP_CHECKSUM_NONE): + priv_xform->compress.chksum = IGZIP_DEFLATE; + break; + case(RTE_COMP_CHECKSUM_CRC32): + priv_xform->compress.chksum = IGZIP_GZIP_NO_HDR; + break; + case(RTE_COMP_CHECKSUM_ADLER32): + priv_xform->compress.chksum = IGZIP_ZLIB_NO_HDR; + break; + case(RTE_COMP_CHECKSUM_CRC32_ADLER32): + ISAL_PMD_LOG(ERR, "Combined CRC and ADLER checksum not" + " supported\n"); + return -ENOTSUP; + default: + ISAL_PMD_LOG(ERR, "Checksum type not supported\n"); + return -ENOTSUP; + } + /* Set private xform level. * Checking compliance with compressdev API, -1 <= level => 9 */ @@ -170,9 +187,24 @@ isal_comp_set_priv_xform_parameters(struct isal_priv_xform *priv_xform, } priv_xform->decompress.algo = RTE_COMP_ALGO_DEFLATE; - /* Set private xform checksum - raw deflate by default */ - if (xform->compress.chksum != RTE_COMP_CHECKSUM_NONE) { - ISAL_PMD_LOG(ERR, "Checksum not supported\n"); + /* Set private xform checksum */ + switch (xform->decompress.chksum) { + /* Raw deflate by default */ + case(RTE_COMP_CHECKSUM_NONE): + priv_xform->decompress.chksum = ISAL_DEFLATE; + break; + case(RTE_COMP_CHECKSUM_CRC32): + priv_xform->decompress.chksum = ISAL_GZIP_NO_HDR_VER; + break; + case(RTE_COMP_CHECKSUM_ADLER32): + priv_xform->decompress.chksum = ISAL_ZLIB_NO_HDR_VER; + break; + case(RTE_COMP_CHECKSUM_CRC32_ADLER32): + ISAL_PMD_LOG(ERR, "Combined CRC and ADLER checksum not" + " supported\n"); + return -ENOTSUP; + default: + ISAL_PMD_LOG(ERR, "Checksum type not supported\n"); return -ENOTSUP; } @@ -376,6 +408,9 @@ process_isal_deflate(struct rte_comp_op *op, struct isal_comp_qp *qp, qp->stream->level_buf = temp_level_buf; + /* Set Checksum flag */ + qp->stream->gzip_flag = priv_xform->compress.chksum; + /* Stateless operation, input will be consumed in one go */ qp->stream->flush = NO_FLUSH; @@ -459,8 +494,18 @@ process_isal_deflate(struct rte_comp_op *op, struct isal_comp_qp *qp, return ret; } } + op->consumed = qp->stream->total_in; - op->produced = qp->stream->total_out; + if (qp->stream->gzip_flag == IGZIP_DEFLATE) { + op->produced = qp->stream->total_out; + } else if (qp->stream->gzip_flag == IGZIP_ZLIB_NO_HDR) { + op->produced = qp->stream->total_out - CHKSUM_SZ
[dpdk-dev] [PATCH v2 2/3] test/compress: add checksum tests
This patch adds a test which examines what type of checksum the PMD supports, adler, crc32 or alder32_crc32 and tests that feature if the PMD supports it. Signed-off-by: Lee Daly --- test/test/test_compressdev.c | 206 ++- 1 file changed, 203 insertions(+), 3 deletions(-) diff --git a/test/test/test_compressdev.c b/test/test/test_compressdev.c index 4b98001..2ca4c91 100644 --- a/test/test/test_compressdev.c +++ b/test/test/test_compressdev.c @@ -37,6 +37,12 @@ #define CACHE_SIZE 0 #define OFFSET 800 +#define ZLIB_CRC_CHECKSUM_WINDOW_BITS 31 +#define ZLIB_HEADER_SIZE 2 +#define ZLIB_TRAILER_SIZE 4 +#define GZIP_HEADER_SIZE 10 +#define GZIP_TRAILER_SIZE 8 + const char * huffman_type_strings[] = { [RTE_COMP_HUFFMAN_DEFAULT] = "PMD default", @@ -319,6 +325,10 @@ compress_zlib(struct rte_comp_op *op, * When doing raw DEFLATE, this number will be negative. */ window_bits = -(xform->compress.window_size); + if (xform->compress.chksum == RTE_COMP_CHECKSUM_ADLER32) + window_bits *= -1; + else if (xform->compress.chksum == RTE_COMP_CHECKSUM_CRC32) + window_bits = ZLIB_CRC_CHECKSUM_WINDOW_BITS; comp_level = xform->compress.level; @@ -422,8 +432,19 @@ compress_zlib(struct rte_comp_op *op, } op->consumed = stream.total_in; - op->produced = stream.total_out; + if (xform->compress.chksum == RTE_COMP_CHECKSUM_ADLER32) { + rte_pktmbuf_adj(op->m_dst, ZLIB_HEADER_SIZE); + op->produced = stream.total_out - + (ZLIB_HEADER_SIZE + ZLIB_TRAILER_SIZE); + } else if (xform->compress.chksum == RTE_COMP_CHECKSUM_CRC32) { + rte_pktmbuf_adj(op->m_dst, GZIP_HEADER_SIZE); + op->produced = stream.total_out - + (GZIP_HEADER_SIZE + GZIP_TRAILER_SIZE); + } else + op->produced = stream.total_out; + op->status = RTE_COMP_OP_STATUS_SUCCESS; + op->output_chksum = stream.adler; deflateReset(&stream); @@ -457,7 +478,6 @@ decompress_zlib(struct rte_comp_op *op, * When doing raw DEFLATE, this number will be negative. */ window_bits = -(xform->decompress.window_size); - ret = inflateInit2(&stream, window_bits); if (ret != Z_OK) { @@ -706,6 +726,7 @@ test_deflate_comp_decomp(const char * const test_bufs[], const struct rte_compressdev_capabilities *capa = rte_compressdev_capability_get(0, RTE_COMP_ALGO_DEFLATE); char *contig_buf = NULL; + uint64_t compress_checksum[num_bufs]; /* Initialize all arrays to NULL */ memset(uncomp_bufs, 0, sizeof(struct rte_mbuf *) * num_bufs); @@ -896,6 +917,7 @@ test_deflate_comp_decomp(const char * const test_bufs[], &ops_processed[num_total_deqd], num_bufs); num_total_deqd += num_deqd; deqd_retries++; + } while (num_total_deqd < num_enqd); deqd_retries = 0; @@ -929,6 +951,8 @@ test_deflate_comp_decomp(const char * const test_bufs[], ops_processed[i]->consumed == 0 ? 0 : (float)ops_processed[i]->produced / ops_processed[i]->consumed * 100); + if (compress_xform->chksum != RTE_COMP_CHECKSUM_NONE) + compress_checksum[i] = ops_processed[i]->output_chksum; ops[i] = NULL; } @@ -1166,11 +1190,23 @@ test_deflate_comp_decomp(const char * const test_bufs[], buf2 = rte_pktmbuf_read(ops_processed[i]->m_dst, ops_processed[i]->dst.offset, ops_processed[i]->produced, contig_buf); - if (compare_buffers(buf1, strlen(buf1) + 1, buf2, ops_processed[i]->produced) < 0) goto exit; + /* Test checksums */ + if (compress_xforms[0]->compress.chksum != + RTE_COMP_CHECKSUM_NONE) { + if (ops_processed[i]->output_chksum != + compress_checksum[i]) { + RTE_LOG(ERR, USER1, "The checksums differ\n" + "Compression Checksum: %" PRIu64 "\tDecompression " + "Checksum: %" PRIu64 "\n", compress_checksum[i], + ops_processed[i]->output_chksum); + goto exit; + } + } + rte_free(contig_buf); contig_buf = NULL; } @@ -1605,6 +1641,168 @@ test_compressdev_deflate_stateless_offset(void) } return TEST_SUCCESS; + +} + +static int +test_compressdev_deflate_stateless_checksum(void) +{ + struct comp_tes
[dpdk-dev] [PATCH v2 3/3] doc: update ISA-L guide to reflect checksum support
This updates the ISA-L compression driver guide on how to enable and use checksums. This also updates the compression drivers features matrix. Will add to release notes once the 19.02.rst file is added. Signed-off-by: Lee Daly --- doc/guides/compressdevs/features/isal.ini | 2 ++ doc/guides/compressdevs/isal.rst | 30 -- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/doc/guides/compressdevs/features/isal.ini b/doc/guides/compressdevs/features/isal.ini index 919cf70..e705031 100644 --- a/doc/guides/compressdevs/features/isal.ini +++ b/doc/guides/compressdevs/features/isal.ini @@ -12,5 +12,7 @@ OOP SGL In SGL Out = Y OOP SGL In LB Out = Y OOP LB In SGL Out = Y Deflate= Y +Adler32= Y +Crc32 = Y Fixed = Y Dynamic= Y diff --git a/doc/guides/compressdevs/isal.rst b/doc/guides/compressdevs/isal.rst index 3bc3022..af1f41f 100644 --- a/doc/guides/compressdevs/isal.rst +++ b/doc/guides/compressdevs/isal.rst @@ -27,6 +27,33 @@ Window size support: * 32K +Checksum: + +* CRC32 +* ADLER32 + +To enable a checksum in the driver, the compression and/or decompression xform +structure, rte_comp_xform, must be filled with either of the CompressDev +checksum flags supported. :: + + compress_xform->compress.chksum = RTE_COMP_CHECKSUM_CRC32 + + decompress_xform->decompress.chksum = RTE_COMP_CHECKSUM_CRC32 + +:: + + compress_xform->compress.chksum = RTE_COMP_CHECKSUM_ADLER32 + + decompress_xform->decompress.chksum = RTE_COMP_CHECKSUM_ADLER32 + +If you request a checksum for compression or decompression, +the checksum field in the operation structure, ``op->output_chksum``, +will be filled with the checksum. + +.. Note:: + + For the compression case above, your output buffer will need to be large enough to hold the compressed data plus a scratchpad for the checksum at the end, the scratchpad is 8 bytes for CRC32 and 4 bytes for Adler32. + Level guide: The ISA-L levels have been mapped to somewhat correspond to the same ZLIB level, @@ -75,13 +102,12 @@ As a result the level mappings from the API to the PMD are shown below. The above table only shows mapping when API calls for dynamic compression. For fixed compression, regardless of API level, internally ISA-L level 0 is always used. + Limitations --- * Compressdev level 0, no compression, is not supported. -* Checksums will not be supported until future release. - Installation -- 2.7.4
[dpdk-dev] [PATCH] version: 19.02-rc0
Start version numbering for a new release cycle, and introduce a template file for release notes. The release notes comments are updated to mandate a scope label for API and ABI changes. Signed-off-by: Thomas Monjalon --- doc/guides/rel_notes/index.rst | 1 + doc/guides/rel_notes/release_19_02.rst | 209 lib/librte_eal/common/include/rte_version.h | 8 +- meson.build | 2 +- 4 files changed, 215 insertions(+), 5 deletions(-) create mode 100644 doc/guides/rel_notes/release_19_02.rst diff --git a/doc/guides/rel_notes/index.rst b/doc/guides/rel_notes/index.rst index 1243e985c..ccfd38bcf 100644 --- a/doc/guides/rel_notes/index.rst +++ b/doc/guides/rel_notes/index.rst @@ -8,6 +8,7 @@ Release Notes :maxdepth: 1 :numbered: +release_19_02 release_18_11 release_18_08 release_18_05 diff --git a/doc/guides/rel_notes/release_19_02.rst b/doc/guides/rel_notes/release_19_02.rst new file mode 100644 index 0..387e20e38 --- /dev/null +++ b/doc/guides/rel_notes/release_19_02.rst @@ -0,0 +1,209 @@ +.. SPDX-License-Identifier: BSD-3-Clause +Copyright 2018 The DPDK contributors + +DPDK Release 19.02 +== + +.. **Read this first.** + + The text in the sections below explains how to update the release notes. + + Use proper spelling, capitalization and punctuation in all sections. + + Variable and config names should be quoted as fixed width text: + ``LIKE_THIS``. + + Build the docs and view the output file to ensure the changes are correct:: + + make doc-guides-html + + xdg-open build/doc/html/guides/rel_notes/release_19_02.html + + +New Features + + +.. This section should contain new features added in this release. + Sample format: + + * **Add a title in the past tense with a full stop.** + + Add a short 1-2 sentence description in the past tense. + The description should be enough to allow someone scanning + the release notes to understand the new feature. + + If the feature adds a lot of sub-features you can use a bullet list + like this: + + * Added feature foo to do something. + * Enhanced feature bar to do something else. + + Refer to the previous release notes for examples. + + Suggested order in release notes items: + * Core libs (EAL, mempool, ring, mbuf, buses) + * Device abstraction libs and PMDs + - ethdev (lib, PMDs) + - cryptodev (lib, PMDs) + - eventdev (lib, PMDs) + - etc + * Other libs + * Apps, Examples, Tools (if significant) + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +Removed Items +- + +.. This section should contain removed items in this release. Sample format: + + * Add a short 1-2 sentence description of the removed item + in the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +API Changes +--- + +.. This section should contain API changes. Sample format: + + * sample: Add a short 1-2 sentence description of the API change. + Start with a scope label like "ethdev:". + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +ABI Changes +--- + +.. This section should contain ABI changes. Sample format: + + * sample: Add a short 1-2 sentence description of the ABI change. + Start with a scope label like "ethdev:". + that was announced in the previous releases and made in this release. + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +Shared Library Versions +--- + +.. Update any library version updated in this release + and prepend with a ``+`` sign, like this: + + libfoo.so.1 + + libupdated.so.2 + libbar.so.1 + + This section is a comment. Do not overwrite or remove it. + = + +The libraries prepended with a plus sign were incremented in this version. + +.. code-block:: diff + + librte_acl.so.2 + librte_bbdev.so.1 + librte_bitratestats.so.2 + librte_bpf.so.1 + librte_bus_dpaa.so.2 + librte_bus_fslmc.so.2 + librte_bus_ifpga.so.2 + librte_bus_pci.so.2 + librte_bus_vdev.so.2 + librte_bus_vmbus.so.2 + librte_cfgfile
Re: [dpdk-dev] [PATCH] examples/bond: wait for slaves to become active
On 11/14/2018 12:19 PM, Radu Nicolau wrote: > Do not start the packet processing threads until all configured > slaves become active. Hi Radu, What happens if packet processing threads started before all slaves active? Exit app, error, crash? So can we say this patch is fixing packet forwarding? (fix in title?) And do we know what break it, why this was not required previously but required now? (Fixes line ?) Thanks, ferruh > > Signed-off-by: Radu Nicolau > --- > examples/bond/main.c | 15 +++ > 1 file changed, 15 insertions(+) > > diff --git a/examples/bond/main.c b/examples/bond/main.c > index b282e68..6623cae 100644 > --- a/examples/bond/main.c > +++ b/examples/bond/main.c > @@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) > struct rte_eth_rxconf rxq_conf; > struct rte_eth_txconf txq_conf; > struct rte_eth_conf local_port_conf = port_conf; > + uint16_t wait_counter = 20; > > retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB, > 0 /*SOCKET_ID_ANY*/); > @@ -274,6 +275,20 @@ bond_port_init(struct rte_mempool *mbuf_pool) > if (retval < 0) > rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, > retval); > > + printf("Waiting for slaves to become active..."); > + while (wait_counter) { > + uint16_t act_slaves[16] = {0}; > + if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) == > + slaves_count) { > + printf("\n"); > + break; > + } > + sleep(1); > + printf("..."); > + if (--wait_counter == 0) > + rte_exit(-1, "\nFailed to activate slaves\n"); > + } > + > rte_eth_promiscuous_enable(BOND_PORT); > > struct ether_addr addr; >
Re: [dpdk-dev] [PATCH] version: 19.02-rc0
On 11/28/2018 10:44 AM, Thomas Monjalon wrote: > Start version numbering for a new release cycle, > and introduce a template file for release notes. > > The release notes comments are updated to mandate > a scope label for API and ABI changes. > > Signed-off-by: Thomas Monjalon > --- > doc/guides/rel_notes/index.rst | 1 + > doc/guides/rel_notes/release_19_02.rst | 209 What do you think about storing the release notes template in git repo, this helps creating new ones for next release also makes easier to track/view what has been changed in the release notes template? Reviewed-by: Ferruh Yigit
[dpdk-dev] [PATCH] net/nfp: add multiprocess support
This patch introduces changes for supporting multiprocess support. This is trivial for VFs but comes with some limitations for the PF. Due to restrictions when using NFP CPP interface, just one secondary process is supported by now for the PF. Signed-off-by: Alejandro Lucero --- doc/guides/nics/features/nfp.ini | 1 + doc/guides/nics/features/nfp_vf.ini| 1 + drivers/net/nfp/nfp_net.c | 133 + drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c | 103 ++-- 4 files changed, 177 insertions(+), 61 deletions(-) diff --git a/doc/guides/nics/features/nfp.ini b/doc/guides/nics/features/nfp.ini index d2899e7fb..70297b090 100644 --- a/doc/guides/nics/features/nfp.ini +++ b/doc/guides/nics/features/nfp.ini @@ -24,5 +24,6 @@ Basic stats = Y Stats per queue = Y Linux UIO= Y Linux VFIO = Y +Multiprocess aware = Y x86-64 = Y Usage doc= Y diff --git a/doc/guides/nics/features/nfp_vf.ini b/doc/guides/nics/features/nfp_vf.ini index d2899e7fb..70297b090 100644 --- a/doc/guides/nics/features/nfp_vf.ini +++ b/doc/guides/nics/features/nfp_vf.ini @@ -24,5 +24,6 @@ Basic stats = Y Stats per queue = Y Linux UIO= Y Linux VFIO = Y +Multiprocess aware = Y x86-64 = Y Usage doc= Y diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c index 54c6da924..ffef97d80 100644 --- a/drivers/net/nfp/nfp_net.c +++ b/drivers/net/nfp/nfp_net.c @@ -766,9 +766,12 @@ nfp_net_start(struct rte_eth_dev *dev) goto error; } - if (hw->is_pf) + if (hw->is_pf && rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port up */ nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1); + else + nfp_eth_set_configured(dev->process_private, + hw->pf_port_idx, 1); hw->ctrl = new_ctrl; @@ -817,9 +820,12 @@ nfp_net_stop(struct rte_eth_dev *dev) (struct nfp_net_rxq *)dev->data->rx_queues[i]); } - if (hw->is_pf) + if (hw->is_pf && rte_eal_process_type() == RTE_PROC_PRIMARY) /* Configure the physical port down */ nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0); + else + nfp_eth_set_configured(dev->process_private, + hw->pf_port_idx, 0); } /* Reset and stop device. The device can not be restarted. */ @@ -2918,16 +2924,16 @@ nfp_net_init(struct rte_eth_dev *eth_dev) hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); - /* Registering LSC interrupt handler */ - rte_intr_callback_register(&pci_dev->intr_handle, - nfp_net_dev_interrupt_handler, - (void *)eth_dev); - - /* Telling the firmware about the LSC interrupt entry */ - nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); - - /* Recording current stats counters values */ - nfp_net_stats_reset(eth_dev); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* Registering LSC interrupt handler */ + rte_intr_callback_register(&pci_dev->intr_handle, + nfp_net_dev_interrupt_handler, + (void *)eth_dev); + /* Telling the firmware about the LSC interrupt entry */ + nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); + /* Recording current stats counters values */ + nfp_net_stats_reset(eth_dev); + } return 0; @@ -2947,7 +2953,7 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, struct rte_eth_dev *eth_dev; struct nfp_net_hw *hw; char *port_name; - int ret; + int retval; port_name = rte_zmalloc("nfp_pf_port_name", 100, 0); if (!port_name) @@ -2958,51 +2964,76 @@ nfp_pf_create_dev(struct rte_pci_device *dev, int port, int ports, else sprintf(port_name, "%s", dev->device.name); - eth_dev = rte_eth_dev_allocate(port_name); - if (!eth_dev) - return -ENOMEM; - if (port == 0) { - *priv = rte_zmalloc(port_name, - sizeof(struct nfp_net_adapter) * ports, - RTE_CACHE_LINE_SIZE); - if (!*priv) { - rte_eth_dev_release_port(eth_dev); - return -ENOMEM; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(port_name); + if (!eth_dev) { + rte_free(port_name); + return -ENODEV;
Re: [dpdk-dev] Question about telemetry on 18.11 release
Hello Rami, Thanks for your response. Please see inline. > Hi, Hideyuki, > > >Rami, thanks for your advice. > >If I understand you correctly, then > >there already exist APIs to collect statistic > >information inside dpdk including CPU usage. > > Yup. I want to also note that the librte jobstats is very veteran, it > exists in DPDK versions prior to 16.04 (like in 2.2.0; see: > https://github.com/DPDK/dpdk/tree/v2.2.0/lib/librte_jobstats ) > The telemetry library is relatively new. > > Following are my answers to your new queries (Q5-Q7): > > >Q5.Are there any API document for jobstats? > AFAIK, there is no API document apart from what I already sent (namely > the sample guide and > the API link, https://doc.dpdk.org/api/rte__jobstats_8h_source.html) [Hideyuki] Are there any reason why accompanied document does NOT exit? (if you know the reason why) > >Q6. Is it possible to use those (jobstats and telemetry) together? > It could be, never tried it though. > > >Q7.Are there any samples implementing above? > > Assuming you mean implementing both APIs together: No, AFAIK. It will > be great if someone will write documentation and implement such a > thing, if it is doable. [Hideyuki] Is it possible to contribute such a sample application by my side? (Just idea, I need permission from my boss though..) In that case, is it correct that what I should do is create/verify my contribution and post patches to dev list? Or are there any steps(procedures) to add such new sample? Q8. BTW, when I read jobstats sample, I felt that application programmers should be fully aware of the jobstats from the first place of software design (many timers in main function) and it is NOT "opt-in" "opt-out" feature. When considering many NFV applications are already there, I think it is more preferable that such statistics features can be "opt-in" or "opt-out". Is it possible to realize above using current DPDK framework? Thanks again. BR, Hideyuki Yamashita NTT TechnoCross > Regards, > Rami Rosen
Re: [dpdk-dev] [PATCH 1/2] devtools: report the incorrect section when complaining
On Wed, Nov 28, 2018 at 11:28:52AM +0100, David Marchand wrote: > It does not hurt reporting the incriminated section. > > Before: > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in a > section other than the EXPERIMENTAL section of the version map > > After: > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in > +EXPERIMENTAL section other than the EXPERIMENTAL section of the > version map > nit: Its a bit odd in the changelog to have an example in which the incorect section being reported matches the expected section. I.e. its confusing to read "... is added in +EXPERIMENTAL section other than the EXPERIMENTAL section". Might be better to change the language of the report below and the example to be something like: ERROR: symbol is added in the section, but is expected to be added in the EXPERIMENTAL section ACK to the notion of reporting the offending section though. Thats a good idea. Neil > Signed-off-by: David Marchand > --- > > Used http://patchwork.dpdk.org/patch/48354/ to test. > > --- > devtools/check-symbol-change.sh | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/devtools/check-symbol-change.sh b/devtools/check-symbol-change.sh > index 1d21e91..66741be 100755 > --- a/devtools/check-symbol-change.sh > +++ b/devtools/check-symbol-change.sh > @@ -115,7 +115,7 @@ check_for_rule_violations() > if [ $? -ne 0 ] > then > echo -n "ERROR: symbol $symname " > - echo -n "is added in a section " > + echo -n "is added in $secname section " > echo -n "other than the EXPERIMENTAL " > echo "section of the version map" > ret=1 > -- > 1.8.3.1 > >
Re: [dpdk-dev] [PATCH 3/3] lib/librte_meter: update abi to include new rfc4115 function
On 28 Nov 2018, at 11:09, Thomas Monjalon wrote: 28/11/2018 10:27, Eelco Chaudron: On 28 Nov 2018, at 9:38, David Marchand wrote: On Tue, Nov 27, 2018 at 4:22 PM Eelco Chaudron wrote: --- a/lib/librte_meter/Makefile +++ b/lib/librte_meter/Makefile -LIBABIVER := 2 +LIBABIVER := 3 As far as I understood the policy around the EXPERIMENTAL section, you don't need to bump the library version. Thought I would add the new function as none experimental, i.e. next version, but checkpatch did not allow me to do this. Tried to find info on what the right process was, as these functions are just another meter implementation using similar existing APIs. If anyone has any background on this please point me to it. It is documented here: http://doc.dpdk.org/guides/contributing/versioning.html The case for "similar API" is not handled specifically so far. So you need to introduce it as experimental. Thanks for the clarification, will update the APIs with __rte_experimental in the next iteration. I changed the library version as an existing data structure changed (which in theory should not change the location of the data), but the ABI check popped warnings so I decided to increase the version. It deserves to analyze why the ABI check raises a warning. If it really needs to bump the ABI version, you should justify it in the commit message, and explain what changed in the ABI section of the release notes, plus update the version in the release notes. Will look at it more closely and update it for the next iteration.
Re: [dpdk-dev] [PATCH] version: 19.02-rc0
28/11/2018 12:16, Ferruh Yigit: > On 11/28/2018 10:44 AM, Thomas Monjalon wrote: > > Start version numbering for a new release cycle, > > and introduce a template file for release notes. > > > > The release notes comments are updated to mandate > > a scope label for API and ABI changes. > > > > Signed-off-by: Thomas Monjalon > > --- > > doc/guides/rel_notes/index.rst | 1 + > > doc/guides/rel_notes/release_19_02.rst | 209 > > What do you think about storing the release notes template in git repo, this > helps creating new ones for next release also makes easier to track/view what > has been changed in the release notes template? If something is changed in the release notes, it should be updated in the template, with a risk of forgetting. And the list of libraries would need to be updated too. I feel it would be more complications.
Re: [dpdk-dev] [PATCH 3/3] lib/librte_meter: update abi to include new rfc4115 function
28/11/2018 13:40, Eelco Chaudron: > > On 28 Nov 2018, at 11:09, Thomas Monjalon wrote: > > > 28/11/2018 10:27, Eelco Chaudron: > >> On 28 Nov 2018, at 9:38, David Marchand wrote: > >>> On Tue, Nov 27, 2018 at 4:22 PM Eelco Chaudron > >>> wrote: > --- a/lib/librte_meter/Makefile > +++ b/lib/librte_meter/Makefile > -LIBABIVER := 2 > +LIBABIVER := 3 > >>> > >>> As far as I understood the policy around the EXPERIMENTAL section, > >>> you > >>> don't need to bump the library version. > >> > >> Thought I would add the new function as none experimental, i.e. next > >> version, but checkpatch did not allow me to do this. > >> > >> Tried to find info on what the right process was, as these functions > >> are > >> just another meter implementation using similar existing APIs. If > >> anyone > >> has any background on this please point me to it. > > > > It is documented here: > > http://doc.dpdk.org/guides/contributing/versioning.html > > > > The case for "similar API" is not handled specifically so far. > > So you need to introduce it as experimental. > > Thanks for the clarification, will update the APIs with > __rte_experimental in the next iteration. You should also add this on top of the doxygen comment: * @warning * @b EXPERIMENTAL: this API may change without prior notice
Re: [dpdk-dev] [PATCH 1/2] devtools: report the incorrect section when complaining
On Wed, Nov 28, 2018 at 1:35 PM Neil Horman wrote: > On Wed, Nov 28, 2018 at 11:28:52AM +0100, David Marchand wrote: > > It does not hurt reporting the incriminated section. > > > > Before: > > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in a > > section other than the EXPERIMENTAL section of the version map > > > > After: > > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in > > +EXPERIMENTAL section other than the EXPERIMENTAL section of the > > version map > > > nit: Its a bit odd in the changelog to have an example in which the > incorect > section being reported matches the expected section. I.e. its confusing > to read > "... is added in +EXPERIMENTAL section other than the EXPERIMENTAL > section". > Might be better to change the language of the report below and the example > to be > something like: > > ERROR: symbol is added in the section, but is expected > to be > added in the EXPERIMENTAL section > > ACK to the notion of reporting the offending section though. Thats a good > idea. > Ok, updated for v2. -- David Marchand
Re: [dpdk-dev] [PATCH v2 3/3] net/af_packet: get 'framesz' from the iface's MTU
On 27/11/2018 17:43, Ferruh Yigit wrote: > On 11/20/2018 10:26 AM, Tiago Lam wrote: >> Use the underlying MTU to calculate the framsize to be used for the mmap >> RINGs. This is to make it more flexible on deployments with different >> MTU requirements, instead of using a pre-defined value of 2048B. > > This behavior change should be documented in af_packet documentation which is > missing unfortunately. > Would you able to introduce the initial/basic af_packet doc to at least to > document device argument? If not please let me know, I can work on it. > Thanks for the review, Ferruh. Yeah, I don't mind cooking something up and submitting here for review; I'll wait a couple of days for a reply from John W. before proceeding, though. But given there's no documentation for af_packet yet, do you prefer to wait for that to be available, and apply it all together? Or could that be applied later as part of another patch? Tiago.
Re: [dpdk-dev] [PATCH 3/3] lib/librte_meter: update abi to include new rfc4115 function
On 28 Nov 2018, at 13:51, Thomas Monjalon wrote: > 28/11/2018 13:40, Eelco Chaudron: >> >> On 28 Nov 2018, at 11:09, Thomas Monjalon wrote: >> >>> 28/11/2018 10:27, Eelco Chaudron: On 28 Nov 2018, at 9:38, David Marchand wrote: > On Tue, Nov 27, 2018 at 4:22 PM Eelco Chaudron > wrote: >> --- a/lib/librte_meter/Makefile >> +++ b/lib/librte_meter/Makefile >> -LIBABIVER := 2 >> +LIBABIVER := 3 > > As far as I understood the policy around the EXPERIMENTAL section, > you > don't need to bump the library version. Thought I would add the new function as none experimental, i.e. next version, but checkpatch did not allow me to do this. Tried to find info on what the right process was, as these functions are just another meter implementation using similar existing APIs. If anyone has any background on this please point me to it. >>> >>> It is documented here: >>> http://doc.dpdk.org/guides/contributing/versioning.html >>> >>> The case for "similar API" is not handled specifically so far. >>> So you need to introduce it as experimental. >> >> Thanks for the clarification, will update the APIs with >> __rte_experimental in the next iteration. > > You should also add this on top of the doxygen comment: > > * @warning > * @b EXPERIMENTAL: this API may change without prior notice Thanks, done!
Re: [dpdk-dev] [PATCH] version: 19.02-rc0
> -Original Message- > From: Thomas Monjalon [mailto:tho...@monjalon.net] > Sent: Wednesday, November 28, 2018 10:45 AM > To: dev@dpdk.org > Cc: Mcnamara, John ; Kovacevic, Marko > > Subject: [PATCH] version: 19.02-rc0 Acked-by: John McNamara
Re: [dpdk-dev] [PATCH] version: 19.02-rc0
On 11/28/2018 12:45 PM, Thomas Monjalon wrote: > 28/11/2018 12:16, Ferruh Yigit: >> On 11/28/2018 10:44 AM, Thomas Monjalon wrote: >>> Start version numbering for a new release cycle, >>> and introduce a template file for release notes. >>> >>> The release notes comments are updated to mandate >>> a scope label for API and ABI changes. >>> >>> Signed-off-by: Thomas Monjalon >>> --- >>> doc/guides/rel_notes/index.rst | 1 + >>> doc/guides/rel_notes/release_19_02.rst | 209 >> >> What do you think about storing the release notes template in git repo, this >> helps creating new ones for next release also makes easier to track/view what >> has been changed in the release notes template? > > If something is changed in the release notes, it should be updated > in the template, with a risk of forgetting. > And the list of libraries would need to be updated too. > I feel it would be more complications. OK, I see the concern. What about doing rc0 release notes in two patches: 1- Copy-paste from previous release notes, strip the context. 2- Update the template as desired. Overall I think this is good to have, if you think this is just extra overhead, forget about it. Thanks, ferruh
Re: [dpdk-dev] [PATCH v2 3/3] net/af_packet: get 'framesz' from the iface's MTU
On 11/28/2018 1:12 PM, Lam, Tiago wrote: > On 27/11/2018 17:43, Ferruh Yigit wrote: >> On 11/20/2018 10:26 AM, Tiago Lam wrote: >>> Use the underlying MTU to calculate the framsize to be used for the mmap >>> RINGs. This is to make it more flexible on deployments with different >>> MTU requirements, instead of using a pre-defined value of 2048B. >> >> This behavior change should be documented in af_packet documentation which is >> missing unfortunately. >> Would you able to introduce the initial/basic af_packet doc to at least to >> document device argument? If not please let me know, I can work on it. >> > > Thanks for the review, Ferruh. > > Yeah, I don't mind cooking something up and submitting here for review; > I'll wait a couple of days for a reply from John W. before proceeding, > though. Thanks, appreciated. Agreed to wait a little. > > But given there's no documentation for af_packet yet, do you prefer to > wait for that to be available, and apply it all together? Or could that > be applied later as part of another patch? Both are OK, depends on your availability. I think it is better, to show the history, first patch as the documentation patch for existing behavior and your patch updating framsz usage (3/3) to update that document as well. Thanks, ferruh
Re: [dpdk-dev] [PATCH] version: 19.02-rc0
28/11/2018 14:24, Ferruh Yigit: > On 11/28/2018 12:45 PM, Thomas Monjalon wrote: > > 28/11/2018 12:16, Ferruh Yigit: > >> On 11/28/2018 10:44 AM, Thomas Monjalon wrote: > >>> Start version numbering for a new release cycle, > >>> and introduce a template file for release notes. > >>> > >>> The release notes comments are updated to mandate > >>> a scope label for API and ABI changes. > >>> > >>> Signed-off-by: Thomas Monjalon > >>> --- > >>> doc/guides/rel_notes/index.rst | 1 + > >>> doc/guides/rel_notes/release_19_02.rst | 209 > >> > >> What do you think about storing the release notes template in git repo, > >> this > >> helps creating new ones for next release also makes easier to track/view > >> what > >> has been changed in the release notes template? > > > > If something is changed in the release notes, it should be updated > > in the template, with a risk of forgetting. > > And the list of libraries would need to be updated too. > > I feel it would be more complications. > > OK, I see the concern. > > What about doing rc0 release notes in two patches: > 1- Copy-paste from previous release notes, strip the context. It is not only strip the context, I change version number and add back the removed section. > 2- Update the template as desired. > > Overall I think this is good to have, if you think this is just extra > overhead, > forget about it. I understand you want to track the changes done in the template. I will send a v2 with changes split.
Re: [dpdk-dev] [PATCH] examples/bond: wait for slaves to become active
Hi On 11/28/2018 11:08 AM, Ferruh Yigit wrote: On 11/14/2018 12:19 PM, Radu Nicolau wrote: Do not start the packet processing threads until all configured slaves become active. Hi Radu, What happens if packet processing threads started before all slaves active? Exit app, error, crash? So can we say this patch is fixing packet forwarding? (fix in title?) And do we know what break it, why this was not required previously but required now? (Fixes line ?) From what I see, the problem was always there: bond_ethdev_rx_burst will cycle through slaves, but if called more times with no active slaves the active slave index will point out of bounds, resulting in a segfault. While this may require a better fix, this patch is an improvement even if that fix comes - the configured slaves needs to be checked, and if none became active there is no point going further. in bond_ethdev_rx_burst: slave_count = internals->active_slave_count; ... if (++internals->active_slave == slave_count) internals->active_slave = 0; slave_count is zero, the if() will never be true and active_slave will be continuously incremented. It was not written to work with no active slaves. Thanks, ferruh Signed-off-by: Radu Nicolau --- examples/bond/main.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/examples/bond/main.c b/examples/bond/main.c index b282e68..6623cae 100644 --- a/examples/bond/main.c +++ b/examples/bond/main.c @@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) struct rte_eth_rxconf rxq_conf; struct rte_eth_txconf txq_conf; struct rte_eth_conf local_port_conf = port_conf; + uint16_t wait_counter = 20; retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB, 0 /*SOCKET_ID_ANY*/); @@ -274,6 +275,20 @@ bond_port_init(struct rte_mempool *mbuf_pool) if (retval < 0) rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval); + printf("Waiting for slaves to become active..."); + while (wait_counter) { + uint16_t act_slaves[16] = {0}; + if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) == + slaves_count) { + printf("\n"); + break; + } + sleep(1); + printf("..."); + if (--wait_counter == 0) + rte_exit(-1, "\nFailed to activate slaves\n"); + } + rte_eth_promiscuous_enable(BOND_PORT); struct ether_addr addr;
[dpdk-dev] [PATCH 2/2] sched: fix possible mem leak on initialize
In some case, we may create sched port dynamically, if err when creating so memory will leak. Signed-off-by: Tonghao Zhang --- lib/librte_sched/rte_sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 17de6e6..a3adcca 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -724,6 +724,7 @@ struct rte_sched_port * bmp_mem_size); if (port->bmp == NULL) { RTE_LOG(ERR, SCHED, "Bitmap init error\n"); + rte_free(port); return NULL; } -- 1.8.3.1
[dpdk-dev] [PATCH 1/2] sched: refine get base helper function
use switch instead of if, and it is more easy reading. Signed-off-by: Tonghao Zhang --- lib/librte_sched/rte_sched.c | 40 +++- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 587d5e6..17de6e6 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -385,7 +385,7 @@ enum rte_sched_port_array { uint32_t n_subports_per_port = params->n_subports_per_port; uint32_t n_pipes_per_subport = params->n_pipes_per_subport; uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port; - uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port; + uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_port; uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport); uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe); @@ -407,35 +407,33 @@ enum rte_sched_port_array { size_queue_array = n_pipes_per_port * size_per_pipe_queue_array; base = 0; + switch (array) { + case e_RTE_SCHED_PORT_ARRAY_TOTAL: + base += RTE_CACHE_LINE_ROUNDUP(size_queue_array); - if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_subport); + case e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY: + base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array); - if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_pipe); + case e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY: + base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles); - if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_queue); + case e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES: + base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra); - if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra); + case e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA: + base += RTE_CACHE_LINE_ROUNDUP(size_queue); - if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles); + case e_RTE_SCHED_PORT_ARRAY_QUEUE: + base += RTE_CACHE_LINE_ROUNDUP(size_pipe); - if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) - return base; - base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array); + case e_RTE_SCHED_PORT_ARRAY_PIPE: + base += RTE_CACHE_LINE_ROUNDUP(size_subport); - if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) + case e_RTE_SCHED_PORT_ARRAY_SUBPORT: return base; - base += RTE_CACHE_LINE_ROUNDUP(size_queue_array); + } + RTE_LOG(DEBUG, SCHED, "Should not reach here. \n"); return base; } -- 1.8.3.1
Re: [dpdk-dev] [PATCH] examples/bond: wait for slaves to become active
On 11/28/2018 08:48 AM, Radu Nicolau wrote: Hi On 11/28/2018 11:08 AM, Ferruh Yigit wrote: On 11/14/2018 12:19 PM, Radu Nicolau wrote: Do not start the packet processing threads until all configured slaves become active. Hi Radu, What happens if packet processing threads started before all slaves active? Exit app, error, crash? So can we say this patch is fixing packet forwarding? (fix in title?) And do we know what break it, why this was not required previously but required now? (Fixes line ?) From what I see, the problem was always there: bond_ethdev_rx_burst will cycle through slaves, but if called more times with no active slaves the active slave index will point out of bounds, resulting in a segfault. While this may require a better fix, this patch is an improvement even if that fix comes - the configured slaves needs to be checked, and if none became active there is no point going further. in bond_ethdev_rx_burst: slave_count = internals->active_slave_count; ... if (++internals->active_slave == slave_count) internals->active_slave = 0; slave_count is zero, the if() will never be true and active_slave will be continuously incremented. It was not written to work with no active slaves. Just create another patch for the rx routines. If the active_slave_count is 0, there's nothing to do really. It should just return and not bother with any of the other work. Thanks, ferruh Signed-off-by: Radu Nicolau --- examples/bond/main.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/examples/bond/main.c b/examples/bond/main.c index b282e68..6623cae 100644 --- a/examples/bond/main.c +++ b/examples/bond/main.c @@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) struct rte_eth_rxconf rxq_conf; struct rte_eth_txconf txq_conf; struct rte_eth_conf local_port_conf = port_conf; +uint16_t wait_counter = 20; retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB, 0 /*SOCKET_ID_ANY*/); @@ -274,6 +275,20 @@ bond_port_init(struct rte_mempool *mbuf_pool) if (retval < 0) rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval); +printf("Waiting for slaves to become active..."); +while (wait_counter) { +uint16_t act_slaves[16] = {0}; +if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) == +slaves_count) { +printf("\n"); +break; +} +sleep(1); +printf("..."); +if (--wait_counter == 0) +rte_exit(-1, "\nFailed to activate slaves\n"); +} + rte_eth_promiscuous_enable(BOND_PORT); struct ether_addr addr;
[dpdk-dev] [PATCH v2 2/2] doc: improve release notes template
Some comments are added to encourage classifying API and ABI changes with scope labels. The section "removed items" is moved just after the "new features". The sample for shared library versions is replaced with foo/bar names. Signed-off-by: Thomas Monjalon Reviewed-by: Ferruh Yigit Acked-by: John McNamara --- doc/guides/rel_notes/release_19_02.rst | 67 ++ 1 file changed, 35 insertions(+), 32 deletions(-) diff --git a/doc/guides/rel_notes/release_19_02.rst b/doc/guides/rel_notes/release_19_02.rst index cbb2ddb78..a94fa86a7 100644 --- a/doc/guides/rel_notes/release_19_02.rst +++ b/doc/guides/rel_notes/release_19_02.rst @@ -55,35 +55,6 @@ New Features = -API Changes - -.. This section should contain API changes. Sample format: - - * Add a short 1-2 sentence description of the API change. - Use fixed width quotes for ``function_names`` or ``struct_names``. - Use the past tense. - - This section is a comment. Do not overwrite or remove it. - Also, make sure to start the actual text at the margin. - = - - -ABI Changes - -.. This section should contain ABI changes. Sample format: - - * Add a short 1-2 sentence description of the ABI change - that was announced in the previous releases and made in this release. - Use fixed width quotes for ``function_names`` or ``struct_names``. - Use the past tense. - - This section is a comment. Do not overwrite or remove it. - Also, make sure to start the actual text at the margin. - = - - Removed Items - @@ -97,15 +68,47 @@ Removed Items = +API Changes +--- + +.. This section should contain API changes. Sample format: + + * sample: Add a short 1-2 sentence description of the API change + which was announced in the previous releases and made in this release. + Start with a scope label like "ethdev:". + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +ABI Changes +--- + +.. This section should contain ABI changes. Sample format: + + * sample: Add a short 1-2 sentence description of the ABI change + which was announced in the previous releases and made in this release. + Start with a scope label like "ethdev:". + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + Shared Library Versions --- .. Update any library version updated in this release and prepend with a ``+`` sign, like this: - librte_acl.so.2 - + librte_cfgfile.so.2 - librte_cmdline.so.2 + libfoo.so.1 + + libupdated.so.2 + libbar.so.1 This section is a comment. Do not overwrite or remove it. = -- 2.19.0
[dpdk-dev] [PATCH v2 1/2] version: 19.02-rc0
Start version numbering for a new release cycle, and introduce a template file for release notes. The release notes comments are updated to mandate a scope label for API and ABI changes. Signed-off-by: Thomas Monjalon Reviewed-by: Ferruh Yigit Acked-by: John McNamara --- doc/guides/rel_notes/index.rst | 1 + doc/guides/rel_notes/release_19_02.rst | 207 lib/librte_eal/common/include/rte_version.h | 8 +- meson.build | 2 +- 4 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 doc/guides/rel_notes/release_19_02.rst diff --git a/doc/guides/rel_notes/index.rst b/doc/guides/rel_notes/index.rst index 1243e985c..ccfd38bcf 100644 --- a/doc/guides/rel_notes/index.rst +++ b/doc/guides/rel_notes/index.rst @@ -8,6 +8,7 @@ Release Notes :maxdepth: 1 :numbered: +release_19_02 release_18_11 release_18_08 release_18_05 diff --git a/doc/guides/rel_notes/release_19_02.rst b/doc/guides/rel_notes/release_19_02.rst new file mode 100644 index 0..cbb2ddb78 --- /dev/null +++ b/doc/guides/rel_notes/release_19_02.rst @@ -0,0 +1,207 @@ +.. SPDX-License-Identifier: BSD-3-Clause +Copyright 2018 The DPDK contributors + +DPDK Release 19.02 +== + +.. **Read this first.** + + The text in the sections below explains how to update the release notes. + + Use proper spelling, capitalization and punctuation in all sections. + + Variable and config names should be quoted as fixed width text: + ``LIKE_THIS``. + + Build the docs and view the output file to ensure the changes are correct:: + + make doc-guides-html + + xdg-open build/doc/html/guides/rel_notes/release_19_02.html + + +New Features + + +.. This section should contain new features added in this release. + Sample format: + + * **Add a title in the past tense with a full stop.** + + Add a short 1-2 sentence description in the past tense. + The description should be enough to allow someone scanning + the release notes to understand the new feature. + + If the feature adds a lot of sub-features you can use a bullet list + like this: + + * Added feature foo to do something. + * Enhanced feature bar to do something else. + + Refer to the previous release notes for examples. + + Suggested order in release notes items: + * Core libs (EAL, mempool, ring, mbuf, buses) + * Device abstraction libs and PMDs + - ethdev (lib, PMDs) + - cryptodev (lib, PMDs) + - eventdev (lib, PMDs) + - etc + * Other libs + * Apps, Examples, Tools (if significant) + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +API Changes +--- + +.. This section should contain API changes. Sample format: + + * Add a short 1-2 sentence description of the API change. + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +ABI Changes +--- + +.. This section should contain ABI changes. Sample format: + + * Add a short 1-2 sentence description of the ABI change + that was announced in the previous releases and made in this release. + Use fixed width quotes for ``function_names`` or ``struct_names``. + Use the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +Removed Items +- + +.. This section should contain removed items in this release. Sample format: + + * Add a short 1-2 sentence description of the removed item + in the past tense. + + This section is a comment. Do not overwrite or remove it. + Also, make sure to start the actual text at the margin. + = + + +Shared Library Versions +--- + +.. Update any library version updated in this release + and prepend with a ``+`` sign, like this: + + librte_acl.so.2 + + librte_cfgfile.so.2 + librte_cmdline.so.2 + + This section is a comment. Do not overwrite or remove it. + = + +The libraries prepended with a plus sign were incremented in this version. + +.. code-block:: diff + + librte_acl.so.2 + librte_bbdev.so.1 + librte_bitratestats.so.2 + librte_bpf.so.1 + librte_bus_dpaa.so.2 + librte_bus_fslmc.so.2 + librte_bus_ifpga.so.2 + librte_bus_pci.so.2 + librte_bus_vdev.so.2 + librte_bus_vmbus.so.2 + librte_cfgfile.so.2 + librte_cmdline.so.2 + librt
Re: [dpdk-dev] [dpdk-users] IPV4/IPV6 TCP/UDP Pseudo Header Checksum APIs
On Tue, Oct 23, 2018 at 11:01:58AM +0200, Olivier Matz wrote: > Hi, > > You are right, the current code does not take IP or IPv6 options > in account. I think this should be considered as a bug. > > The fix for IPv4 is not complicated, I did a quick draft here: > http://git.droids-corp.org/?p=dpdk.git;a=commitdiff;h=96a6978ef6814e1450e1bd65fbce91c3d85b3121 > > For IPv6, it is more complex than expected: > https://tools.ietf.org/html/rfc2460.html#section-8.1 > > - we need to skip extension headers > - we need to parse routing headers and use the proper destination > address in the pseudo header checksum > > This makes me think that the API is not adequate. Asking the user > to provide the headers in a contiguous memory without specifying > the length is quite dangerous, especially if the header comes from > outside, as it can trigger out of bound accesses. > > I wonder if we shouldn't switch to a mbuf based API instead, and > deprecate the old one. > > Thoughts? > Olivier > I have been looking into a similar issue because rte_net_intel_cksum_prepare(), which is used by most tx_pkt_prepare handlers, does not work when ipv6 extensions are present. That function is using rte_ipv6_phdr_cksum(). And this makes rte_eth_tx_prepare() kinda useless for any workloads that encounter ipv6 extensions. There are 2 routing header types now (2 and 3). https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-3 In addition to these routing headers, there is also ipv6 mobility. Pseudo header's source address is supposed to be the address in the Home Address option. https://tools.ietf.org/html/rfc6275#page-36 Who knows there may be future extensions that affect pseudo header.. We can probably make rte_ipv6_phdr_cksum() to understand all existing headers that affect pseudo header, but it will still not be future proof. Should at least document the limitations for rte_ipv6_phdr_cksum().. -Hyong
Re: [dpdk-dev] [RFC 2/3] tqs: add thread quiescent state library
> > > > Hi Honnappa, > Thank you for reviewing the patch, appreciate your comments. > > > > > > + > > > +/* Allocate a new TQS variable with the name *name* in memory. */ > > > +struct rte_tqs * __rte_experimental rte_tqs_alloc(const char *name, > > > +int socket_id, uint64_t lcore_mask) { > > > + char tqs_name[RTE_TQS_NAMESIZE]; > > > + struct rte_tailq_entry *te, *tmp_te; > > > + struct rte_tqs_list *tqs_list; > > > + struct rte_tqs *v, *tmp_v; > > > + int ret; > > > + > > > + if (name == NULL) { > > > + RTE_LOG(ERR, TQS, "Invalid input parameters\n"); > > > + rte_errno = -EINVAL; > > > + return NULL; > > > + } > > > + > > > + te = rte_zmalloc("TQS_TAILQ_ENTRY", sizeof(*te), 0); > > > + if (te == NULL) { > > > + RTE_LOG(ERR, TQS, "Cannot reserve memory for tailq\n"); > > > + rte_errno = -ENOMEM; > > > + return NULL; > > > + } > > > + > > > + snprintf(tqs_name, sizeof(tqs_name), "%s", name); > > > + v = rte_zmalloc_socket(tqs_name, sizeof(struct rte_tqs), > > > + RTE_CACHE_LINE_SIZE, socket_id); > > > + if (v == NULL) { > > > + RTE_LOG(ERR, TQS, "Cannot reserve memory for TQS > > variable\n"); > > > + rte_errno = -ENOMEM; > > > + goto alloc_error; > > > + } > > > + > > > + ret = snprintf(v->name, sizeof(v->name), "%s", name); > > > + if (ret < 0 || ret >= (int)sizeof(v->name)) { > > > + rte_errno = -ENAMETOOLONG; > > > + goto alloc_error; > > > + } > > > + > > > + te->data = (void *) v; > > > + v->lcore_mask = lcore_mask; > > > + > > > + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); > > > + > > > + tqs_list = RTE_TAILQ_CAST(rte_tqs_tailq.head, rte_tqs_list); > > > + > > > + /* Search if a TQS variable with the same name exists already */ > > > + TAILQ_FOREACH(tmp_te, tqs_list, next) { > > > + tmp_v = (struct rte_tqs *) tmp_te->data; > > > + if (strncmp(name, tmp_v->name, RTE_TQS_NAMESIZE) == 0) > > > + break; > > > + } > > > + > > > + if (tmp_te != NULL) { > > > + rte_errno = -EEXIST; > > > + goto tqs_exist; > > > + } > > > + > > > + TAILQ_INSERT_TAIL(tqs_list, te, next); > > > + > > > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); > > > + > > > + return v; > > > + > > > +tqs_exist: > > > + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); > > > + > > > +alloc_error: > > > + rte_free(te); > > > + rte_free(v); > > > + return NULL; > > > +} > > > > That seems quite heavy-weight function just to allocate sync variable. > > As size of struct rte_tqs is constant and known to the user, might be > > better just > > provide rte_tqs_init(struct rte_tqs *tqs, ...) and let user allocate/free > > memory > > for it by himself. > > > I believe, when you say heavy-weight, you are referring to adding tqs > variable to the TAILQ and allocating the memory for it. Yes. > Agree. I also > do not expect that there are a whole lot of tqs variables used in an > application. Even in rte_tqs_free, there is similar overhead. > > The extra part is due to the way the TQS variable will get identified by data > plane threads. I am thinking that a data plane thread will use the > rte_tqs_lookup API to identify a TQS variable. However, it is possible to > share this with data plane threads via a simple shared structure as > well. > > Along with not allocating the memory, are you suggesting that we could skip > maintaining a list of TQS variables in the TAILQ? This will > remove rte_tqs_lookup, rte_tqs_free, rte_tqs_list_dump APIs. I am fine with > this approach. Yes, that's what I suggest. My thought was - it is just another data structure used for synchronization (as spinlock, rwlock, etc.). So should be possible to allocate it statically and we probably don't need to have an ability to lookup such variable by name via tailq. > > > > + > > > +/* Add a reader thread, running on an lcore, to the list of threads > > > + * reporting their quiescent state on a TQS variable. > > > + */ > > > +int __rte_experimental > > > +rte_tqs_register_lcore(struct rte_tqs *v, unsigned int lcore_id) { > > > + TQS_RETURN_IF_TRUE((v == NULL || lcore_id >= > > RTE_TQS_MAX_LCORE), > > > + -EINVAL); > > > > It is not very good practice to make function return different values and > > behave > > in a different way in debug/non-debug mode. > > I'd say that for slow-path (functions in .c) it is always good to check > > input > > parameters. > > For fast-path (functions in .h) we sometimes skip such checking, but debug > > mode can probably use RTE_ASSERT() or so. > Makes sense, I will change this in the next version. > > > > > > > lcore_id >= RTE_TQS_MAX_LCORE > > > > Is this limitation really necessary? > I added this limitation because currently DPDK application cannot take a mask > more than 64bit wide. Otherwise, this should be as big as > RTE_MAX_LCORE. > I see that in the case of '-lcores' option, the number of lcores can be more > than the number of PEs.
[dpdk-dev] [PATCH] ethdev: add function to print a flow
Flow contains the following information: port id, attributes, patterns and actions. The function rte_flow_print prints all above information. It can be used for debugging purposes to validate the behavior of different dpdk applications. Example: running testpmd with the following flow create: flow create 1 transfer ingress pattern eth src is 52:54:00:15:b1:b1 dst is 24:8A:07:8D:AE:C6 / end actions of_push_vlan ethertype 0x8100 / of_set_vlan_vid vlan_vid 0x888 / of_set_vlan_pcp vlan_pcp 7 / port_id id 0 / end Will result in this output: Print flow info port_id =1 group=0 priority =0 ingress =1 egress =0 transfer =1 group=0 reserved =0 pattern type =9 name=RTE_FLOW_ITEM_TYPE_ETH spec type=0x0, src=52:54:00:15:b1:b1, dst=24:8a:07:8d:ae:c6 mask type=0x0, src=ff:ff:ff:ff:ff:ff, dst=ff:ff:ff:ff:ff:ff actions type =23 name=RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN ethertype=0x81 actions type =24 name=RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID vlan_vid=0x8808 actions type =25 name=RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP vlan_pcp=7 actions type =13 name=RTE_FLOW_ACTION_TYPE_PORT_ID id=0 reserved=0 Signed-off-by: Asaf Penso --- lib/librte_ethdev/rte_ethdev_version.map |1 + lib/librte_ethdev/rte_flow.c | 226 ++ lib/librte_ethdev/rte_flow.h | 31 3 files changed, 258 insertions(+), 0 deletions(-) diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map index 92ac3de..7676983 100644 --- a/lib/librte_ethdev/rte_ethdev_version.map +++ b/lib/librte_ethdev/rte_ethdev_version.map @@ -249,6 +249,7 @@ EXPERIMENTAL { rte_eth_switch_domain_free; rte_flow_conv; rte_flow_expand_rss; + rte_flow_print; rte_mtr_capabilities_get; rte_mtr_create; rte_mtr_destroy; diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c index 3277be1..742d892 100644 --- a/lib/librte_ethdev/rte_flow.c +++ b/lib/librte_ethdev/rte_flow.c @@ -16,6 +16,8 @@ #include "rte_flow_driver.h" #include "rte_flow.h" +int rte_flow_logtype; + /** * Flow elements description tables. */ @@ -202,6 +204,222 @@ struct rte_flow_desc_data { NULL, rte_strerror(ENOSYS)); } +/* Example: + * + * struct eth_addr mac; + * [...] + * printf("The Ethernet address is "RTE_ETH_ADDR_FMT"\n", + * RTE_ETH_ADDR_ARGS(mac)); + * + */ +#define RTE_ETH_ADDR_FMT \ + "%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8 +#define RTE_ETH_ADDR_ARGS(addr) RTE_ETH_ADDR_BYTES_ARGS((addr).ea) +#define RTE_ETH_ADDR_BYTES_ARGS(bytes) \ + (bytes)[0], (bytes)[1], (bytes)[2], (bytes)[3], (bytes)[4], (bytes)[5] + +/* Print the flow fields. */ +void __rte_experimental +rte_flow_print(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[]) +{ + RTE_FLOW_LOG(INFO, "Print flow info\n"); + RTE_FLOW_LOG(INFO, "port_id =%u\n", port_id); + RTE_FLOW_LOG(INFO, "group=%u\n", attr->group); + RTE_FLOW_LOG(INFO, "priority =%u\n", attr->priority); + RTE_FLOW_LOG(INFO, "ingress =%u\n", attr->ingress); + RTE_FLOW_LOG(INFO, "egress =%u\n", attr->egress); + RTE_FLOW_LOG(INFO, "transfer =%u\n", attr->transfer); + RTE_FLOW_LOG(INFO, "group=%u\n", attr->group); + RTE_FLOW_LOG(INFO, "reserved =%u\n", attr->reserved); + + for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) { + RTE_FLOW_LOG(INFO, "pattern type =%u\n", pattern->type); + switch (pattern->type) { + case RTE_FLOW_ITEM_TYPE_ETH: { + RTE_FLOW_LOG(INFO, " name=RTE_FLOW_ITEM_TYPE_ETH\n"); + if (pattern->spec) { + const struct rte_flow_item_eth *spec = + pattern->spec; + RTE_FLOW_LOG(INFO, " spec type=0x%x, " + "src="RTE_ETH_ADDR_FMT", dst="RTE_ETH_ADDR_FMT + "\n", + spec->type, + RTE_ETH_ADDR_BYTES_ARGS(spec->src.addr_bytes), + RTE_ETH_ADDR_BYTES_ARGS(spec->dst.addr_bytes)); + } + + if (pattern->mask) { + const struct rte_flow_item_eth *mask = + pattern->mask; + RTE_FLOW_LOG(INFO, " mask type=0x%x, " + "src="RTE_ETH_ADDR_FMT", dst="RTE_ETH_ADDR_FMT + "\n", + mask->type, + RTE_ETH_ADDR_BYTES_ARGS(mask->src.addr_bytes), + RTE_ETH_ADDR_BYTES_ARGS(mask->dst.addr_bytes)); +
Re: [dpdk-dev] [PATCH] examples/bond: wait for slaves to become active
On 11/28/2018 2:28 PM, Chas Williams wrote: On 11/28/2018 08:48 AM, Radu Nicolau wrote: Hi On 11/28/2018 11:08 AM, Ferruh Yigit wrote: On 11/14/2018 12:19 PM, Radu Nicolau wrote: Do not start the packet processing threads until all configured slaves become active. Hi Radu, What happens if packet processing threads started before all slaves active? Exit app, error, crash? So can we say this patch is fixing packet forwarding? (fix in title?) And do we know what break it, why this was not required previously but required now? (Fixes line ?) From what I see, the problem was always there: bond_ethdev_rx_burst will cycle through slaves, but if called more times with no active slaves the active slave index will point out of bounds, resulting in a segfault. While this may require a better fix, this patch is an improvement even if that fix comes - the configured slaves needs to be checked, and if none became active there is no point going further. in bond_ethdev_rx_burst: slave_count = internals->active_slave_count; ... if (++internals->active_slave == slave_count) internals->active_slave = 0; slave_count is zero, the if() will never be true and active_slave will be continuously incremented. It was not written to work with no active slaves. Just create another patch for the rx routines. If the active_slave_count is 0, there's nothing to do really. It should just return and not bother with any of the other work. I can do that, and it will be the better fix I mentioned. But I still think this patch makes the sample app better, at least it gives a hint to someone looking to develop its own app to check on the slaves' status before proceeding to rx. Thanks, ferruh Signed-off-by: Radu Nicolau --- examples/bond/main.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/examples/bond/main.c b/examples/bond/main.c index b282e68..6623cae 100644 --- a/examples/bond/main.c +++ b/examples/bond/main.c @@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) struct rte_eth_rxconf rxq_conf; struct rte_eth_txconf txq_conf; struct rte_eth_conf local_port_conf = port_conf; + uint16_t wait_counter = 20; retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB, 0 /*SOCKET_ID_ANY*/); @@ -274,6 +275,20 @@ bond_port_init(struct rte_mempool *mbuf_pool) if (retval < 0) rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval); + printf("Waiting for slaves to become active..."); + while (wait_counter) { + uint16_t act_slaves[16] = {0}; + if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) == + slaves_count) { + printf("\n"); + break; + } + sleep(1); + printf("..."); + if (--wait_counter == 0) + rte_exit(-1, "\nFailed to activate slaves\n"); + } + rte_eth_promiscuous_enable(BOND_PORT); struct ether_addr addr;
Re: [dpdk-dev] [PATCH] examples/bond: wait for slaves to become active
On 11/28/18 11:04 AM, Radu Nicolau wrote: On 11/28/2018 2:28 PM, Chas Williams wrote: On 11/28/2018 08:48 AM, Radu Nicolau wrote: Hi On 11/28/2018 11:08 AM, Ferruh Yigit wrote: On 11/14/2018 12:19 PM, Radu Nicolau wrote: Do not start the packet processing threads until all configured slaves become active. Hi Radu, What happens if packet processing threads started before all slaves active? Exit app, error, crash? So can we say this patch is fixing packet forwarding? (fix in title?) And do we know what break it, why this was not required previously but required now? (Fixes line ?) From what I see, the problem was always there: bond_ethdev_rx_burst will cycle through slaves, but if called more times with no active slaves the active slave index will point out of bounds, resulting in a segfault. While this may require a better fix, this patch is an improvement even if that fix comes - the configured slaves needs to be checked, and if none became active there is no point going further. in bond_ethdev_rx_burst: slave_count = internals->active_slave_count; ... if (++internals->active_slave == slave_count) internals->active_slave = 0; slave_count is zero, the if() will never be true and active_slave will be continuously incremented. It was not written to work with no active slaves. Just create another patch for the rx routines. If the active_slave_count is 0, there's nothing to do really. It should just return and not bother with any of the other work. I can do that, and it will be the better fix I mentioned. But I still think this patch makes the sample app better, at least it gives a hint to someone looking to develop its own app to check on the slaves' status before proceeding to rx. Yes, I agree this patch is still valid. If you are writing some sort of test, you should wait until the bonding interface and slaves are ready before you start sending traffic. Thanks, ferruh Signed-off-by: Radu Nicolau --- examples/bond/main.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/examples/bond/main.c b/examples/bond/main.c index b282e68..6623cae 100644 --- a/examples/bond/main.c +++ b/examples/bond/main.c @@ -220,6 +220,7 @@ bond_port_init(struct rte_mempool *mbuf_pool) struct rte_eth_rxconf rxq_conf; struct rte_eth_txconf txq_conf; struct rte_eth_conf local_port_conf = port_conf; + uint16_t wait_counter = 20; retval = rte_eth_bond_create("net_bonding0", BONDING_MODE_ALB, 0 /*SOCKET_ID_ANY*/); @@ -274,6 +275,20 @@ bond_port_init(struct rte_mempool *mbuf_pool) if (retval < 0) rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval); + printf("Waiting for slaves to become active..."); + while (wait_counter) { + uint16_t act_slaves[16] = {0}; + if (rte_eth_bond_active_slaves_get(BOND_PORT, act_slaves, 16) == + slaves_count) { + printf("\n"); + break; + } + sleep(1); + printf("..."); + if (--wait_counter == 0) + rte_exit(-1, "\nFailed to activate slaves\n"); + } + rte_eth_promiscuous_enable(BOND_PORT); struct ether_addr addr;
Re: [dpdk-dev] Application used for DSW event_dev performance testing
On 2018-11-27 23:33, Venky Venkatesh wrote: As you can see the DSW overhead dominates the scene and very little real work is getting done. Is there some configuration or tuning to be done to get the sort of performance you are seeing with multiple cores? I can't explain the behavior you are seeing based on the information you have supplied. Attached is a small DSW throughput test program, that I thought might help you to find the issue. It works much like the pipeline simulator I used when developing the scheduler, but it's a lot simpler. Remember to supply "--vdev=event_dsw0". I ran it on my 12-core Skylake desktop (@2,9 GHz, turbo disabled). With zero work and one stage, I get ~640 Mevent/s. For the first few stages you add, you'll see a drop in performance. For example, with 3 stages, you are at ~310 Mevent/s. If you increase DSW_MAX_PORT_OUT_BUFFER and DSW_MAX_PORT_OPS_PER_BG_TASK you see improvements in efficiency on high-core-count machines. On my system, the above goes to 675 M/s for a 1-stage pipeline, and 460 M/s on a 3-stage pipeline, if I apply the following changes to dsw_evdev.h: -#define DSW_MAX_PORT_OUT_BUFFER (32) +#define DSW_MAX_PORT_OUT_BUFFER (64) -#define DSW_MAX_PORT_OPS_PER_BG_TASK (128) +#define DSW_MAX_PORT_OPS_PER_BG_TASK (512) With 500 clock cycles of dummy work, the per-event overhead is ~16 TSC clock cycles/stage and event (i.e. per scheduled event; enqueue + dequeue), if my quick-and-dirty benchmark program does the math correctly. This also includes the overhead from the benchmark program itself. Overhead with a real application will be higher.
Re: [dpdk-dev] Application used for DSW event_dev performance testing
On 2018-11-28 17:55, Mattias Rönnblom wrote: Attached is a small DSW throughput test program, that I thought might help you to find the issue. Looks like DPDK's mailman didn't like my attachment. -- /* * dswtp - A simple DSW eventdev scheduler throughput demo program. * * SPDX-License-Identifier: BSD-3-Clause * * Copyright(c) 2018 Ericsson AB * Mattias Rönnblom */ #include #include #include #include #include #include #include #include #include #include #include #define EVENT_DEV_ID (0) #define NUM_IN_FLIGHT_EVENTS (4096) #define EVENTDEV_MAX_EVENTS (NUM_IN_FLIGHT_EVENTS * 2) #define EVENTDEV_PORT_NEW_THRESHOLD (NUM_IN_FLIGHT_EVENTS) #define NUM_FLOWS (1024) #define ITER_PER_SYNC (32) #define DEQUEUE_BURST_SIZE (32) #define ENQUEUE_BURST_SIZE (32) struct worker_ctx { uint8_t event_dev_id; uint8_t event_port_id; uint32_t events_to_produce; uint16_t num_stages; uint32_t stage_work; int64_t num_events; rte_atomic64_t *events_finished; } __rte_cache_aligned; static void usage(const char *name) { printf("%s \n", name); } static int64_t sync_event_count(rte_atomic64_t *total_events_finished, uint32_t *finished_since_sync) { if (*finished_since_sync > 0) { int64_t total; total = rte_atomic64_add_return(total_events_finished, *finished_since_sync); *finished_since_sync = 0; return total; } else return rte_atomic64_read(total_events_finished); } static void cycle_consume(uint64_t work) { uint64_t deadline; if (likely(work == 0)) return; deadline = rte_get_timer_cycles() + work; while (rte_get_timer_cycles() < deadline) rte_pause(); } static int worker_start(void *arg) { struct worker_ctx *ctx = arg; uint8_t dev_id = ctx->event_dev_id; uint8_t port_id = ctx->event_port_id; uint32_t num_produced = 0; uint32_t finished_since_sync = 0; uint16_t iter_since_sync = 0; for (;;) { uint16_t dequeued; uint16_t i; uint16_t enqueued = 0; if (unlikely(num_produced < ctx->events_to_produce)) { struct rte_event ev = { .op = RTE_EVENT_OP_NEW, .queue_id = 0, .sched_type = RTE_SCHED_TYPE_ATOMIC, .flow_id = rte_rand() % NUM_FLOWS }; if (rte_event_enqueue_new_burst(dev_id, port_id, &ev, 1) == 1) num_produced++; } struct rte_event evs[DEQUEUE_BURST_SIZE]; dequeued = rte_event_dequeue_burst(dev_id, port_id, evs, DEQUEUE_BURST_SIZE, 0); for (i = 0; i < dequeued; i++) { struct rte_event *ev = &evs[i]; uint16_t this_stage = ev->queue_id; uint16_t next_stage_num = this_stage + 1; cycle_consume(ctx->stage_work); ev->op = RTE_EVENT_OP_FORWARD; if (next_stage_num == ctx->num_stages) { finished_since_sync++; ev->queue_id = 0; } else ev->queue_id = next_stage_num; } do { uint16_t left = dequeued - enqueued; uint16_t burst_size = RTE_MIN(left, ENQUEUE_BURST_SIZE); enqueued += rte_event_enqueue_burst(dev_id, port_id, evs+enqueued, burst_size); } while (unlikely(enqueued != dequeued)); iter_since_sync++; if (unlikely(iter_since_sync == ITER_PER_SYNC)) { int64_t total = sync_event_count(ctx->events_finished, &finished_since_sync); if (total >= ctx->num_events) break; iter_since_sync = 0; } } return 0; } static void setup_event_dev(uint16_t num_stages, struct worker_ctx *worker_ctxs, unsigned num_workers) { unsigned i; struct rte_event_dev_info dev_info; for (i=0; i < num_workers; i++) worker_ctxs[i].event_dev_id = EVENT_DEV_ID; rte_event_dev_in
Re: [dpdk-dev] [PATCH v10 7/9] net/virtio: add virtio send command packed queue support
On 11/2/18 10:07 AM, Jens Freimann wrote: Use packed virtqueue format when reading and writing descriptors to/from the ring. Signed-off-by: Jens Freimann --- drivers/net/virtio/virtio_ethdev.c | 90 ++ 1 file changed, 90 insertions(+) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index ed3d5bd2c..4451dd49b 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -141,6 +141,90 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; +static struct virtio_pmd_ctrl * +virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, + int *dlen, int pkt_num) +{ + struct virtqueue *vq = cvq->vq; + int head; + struct vring_desc_packed *desc = vq->ring_packed.desc_packed; + struct virtio_pmd_ctrl *result; + int wrap_counter; + int sum = 0; + int k; + + /* +* Format is enforced in qemu code: +* One TX packet for header; +* At least one TX packet per argument; +* One RX packet for ACK. +*/ + head = vq->vq_avail_idx; + wrap_counter = vq->avail_wrap_counter; + desc[head].flags = VRING_DESC_F_NEXT; + desc[head].addr = cvq->virtio_net_hdr_mem; + desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->avail_wrap_counter ^= 1; + } + + for (k = 0; k < pkt_num; k++) { + desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr) + + sizeof(ctrl->status) + sizeof(uint8_t) * sum; + desc[vq->vq_avail_idx].len = dlen[k]; + desc[vq->vq_avail_idx].flags = VRING_DESC_F_NEXT; + sum += dlen[k]; + vq->vq_free_cnt--; + _set_desc_avail(&desc[vq->vq_avail_idx], + vq->avail_wrap_counter); + rte_smp_wmb(); + vq->vq_free_cnt--; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->avail_wrap_counter ^= 1; + } + } + + + desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr); + desc[vq->vq_avail_idx].len = sizeof(ctrl->status); + desc[vq->vq_avail_idx].flags = VRING_DESC_F_WRITE; + _set_desc_avail(&desc[vq->vq_avail_idx], + vq->avail_wrap_counter); + _set_desc_avail(&desc[head], wrap_counter); + rte_smp_wmb(); + + vq->vq_free_cnt--; + if (++vq->vq_avail_idx >= vq->vq_nentries) { + vq->vq_avail_idx -= vq->vq_nentries; + vq->avail_wrap_counter ^= 1; + } + + virtqueue_notify(vq); + + /* wait for used descriptors in virtqueue */ + do { + rte_rmb(); + usleep(100); + } while (!desc_is_used(&desc[head], vq)); Maybe better to poll for !desc_is_used(&desc[vq->vq_used_cons_idx], vq)) to make it clear we're safe using used_wrap_counter? + + /* now get used descriptors */ + while(desc_is_used(&desc[vq->vq_used_cons_idx], vq)) { + vq->vq_free_cnt++; + if (++vq->vq_used_cons_idx >= vq->vq_nentries) { + vq->vq_used_cons_idx -= vq->vq_nentries; + vq->used_wrap_counter ^= 1; + } + } + + result = cvq->virtio_net_hdr_mz->addr; + return result; +} + static int virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, int *dlen, int pkt_num) @@ -174,6 +258,11 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, memcpy(cvq->virtio_net_hdr_mz->addr, ctrl, sizeof(struct virtio_pmd_ctrl)); + if (vtpci_packed_queue(vq->hw)) { + result = virtio_pq_send_command(cvq, ctrl, dlen, pkt_num); + goto out_unlock; + } + /* * Format is enforced in qemu code: * One TX packet for header; @@ -245,6 +334,7 @@ virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, result = cvq->virtio_net_hdr_mz->addr; +out_unlock: rte_spinlock_unlock(&cvq->lock); return result->status; }
Re: [dpdk-dev] [PATCH] net/nfp: add multiprocess support
On 11/28/2018 11:32 AM, Alejandro Lucero wrote: > This patch introduces changes for supporting multiprocess support. > This is trivial for VFs but comes with some limitations for the PF. > > Due to restrictions when using NFP CPP interface, just one secondary > process is supported by now for the PF. Hi Alejandro, I think we needs to clarify first what is the multiprocess support required in PMD level, there are a few different implementations. cc'ed a few more folks for helping clarification. According my understanding, Only *one* DPDK application (primary or secondary) should control a device. There is no restriction in DPDK for it, this responsibility is pushed to application, application should manage it. Device initialization (probe()) done only by primary application. Because of above two items, we only need RTE_PROC_PRIMARY check in driver probe() but not in other eth_dev_ops. If we need eth_dev_ops should be protected, I believe that should be done in ethdev layer for all PMDs instead of each (some) PMD does it itself. Following should be possible: 1) - Primary starts with 5 ports and configures them - Start 5 secondary apps in a way each control a specific port data path 2) - Primary start with N ports - A secondary per port configure the port and control data path Thanks, ferruh > > Signed-off-by: Alejandro Lucero > --- > doc/guides/nics/features/nfp.ini | 1 + > doc/guides/nics/features/nfp_vf.ini| 1 + > drivers/net/nfp/nfp_net.c | 133 + > drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c | 103 ++-- > 4 files changed, 177 insertions(+), 61 deletions(-) > > diff --git a/doc/guides/nics/features/nfp.ini > b/doc/guides/nics/features/nfp.ini > index d2899e7fb..70297b090 100644 > --- a/doc/guides/nics/features/nfp.ini > +++ b/doc/guides/nics/features/nfp.ini > @@ -24,5 +24,6 @@ Basic stats = Y > Stats per queue = Y > Linux UIO= Y > Linux VFIO = Y > +Multiprocess aware = Y > x86-64 = Y > Usage doc= Y > diff --git a/doc/guides/nics/features/nfp_vf.ini > b/doc/guides/nics/features/nfp_vf.ini > index d2899e7fb..70297b090 100644 > --- a/doc/guides/nics/features/nfp_vf.ini > +++ b/doc/guides/nics/features/nfp_vf.ini > @@ -24,5 +24,6 @@ Basic stats = Y > Stats per queue = Y > Linux UIO= Y > Linux VFIO = Y > +Multiprocess aware = Y > x86-64 = Y > Usage doc= Y > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c > index 54c6da924..ffef97d80 100644 > --- a/drivers/net/nfp/nfp_net.c > +++ b/drivers/net/nfp/nfp_net.c > @@ -766,9 +766,12 @@ nfp_net_start(struct rte_eth_dev *dev) > goto error; > } > > - if (hw->is_pf) > + if (hw->is_pf && rte_eal_process_type() == RTE_PROC_PRIMARY) > /* Configure the physical port up */ > nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 1); > + else > + nfp_eth_set_configured(dev->process_private, > +hw->pf_port_idx, 1); > > hw->ctrl = new_ctrl; > > @@ -817,9 +820,12 @@ nfp_net_stop(struct rte_eth_dev *dev) > (struct nfp_net_rxq *)dev->data->rx_queues[i]); > } > > - if (hw->is_pf) > + if (hw->is_pf && rte_eal_process_type() == RTE_PROC_PRIMARY) > /* Configure the physical port down */ > nfp_eth_set_configured(hw->cpp, hw->pf_port_idx, 0); > + else > + nfp_eth_set_configured(dev->process_private, > +hw->pf_port_idx, 0); > } > > /* Reset and stop device. The device can not be restarted. */ > @@ -2918,16 +2924,16 @@ nfp_net_init(struct rte_eth_dev *eth_dev) >hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], >hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); > > - /* Registering LSC interrupt handler */ > - rte_intr_callback_register(&pci_dev->intr_handle, > -nfp_net_dev_interrupt_handler, > -(void *)eth_dev); > - > - /* Telling the firmware about the LSC interrupt entry */ > - nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); > - > - /* Recording current stats counters values */ > - nfp_net_stats_reset(eth_dev); > + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { > + /* Registering LSC interrupt handler */ > + rte_intr_callback_register(&pci_dev->intr_handle, > +nfp_net_dev_interrupt_handler, > +(void *)eth_dev); > + /* Telling the firmware about the LSC interrupt entry */ > + nn_cfg_writeb(hw, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); > + /* Recording current stats counters values */ > + nfp_net_stats_reset(eth_dev); >
Re: [dpdk-dev] [PATCH] net/nfp: add multiprocess support
On Wed, Nov 28, 2018 at 5:28 PM Ferruh Yigit wrote: > On 11/28/2018 11:32 AM, Alejandro Lucero wrote: > > This patch introduces changes for supporting multiprocess support. > > This is trivial for VFs but comes with some limitations for the PF. > > > > Due to restrictions when using NFP CPP interface, just one secondary > > process is supported by now for the PF. > > Hi Alejandro, > > Hi Ferruh, > I think we needs to clarify first what is the multiprocess support > required in > PMD level, there are a few different implementations. cc'ed a few more > folks for > helping clarification. > > According my understanding, > Only *one* DPDK application (primary or secondary) should control a device. > There is no restriction in DPDK for it, this responsibility is pushed to > application, application should manage it. > > Right. > Device initialization (probe()) done only by primary application. > > I think it could also be possible to have a secondary process where a device is hotplugged, so the probe is done by the secondary and shared with the primary. > Because of above two items, we only need RTE_PROC_PRIMARY check in driver > probe() but not in other eth_dev_ops. > If we need eth_dev_ops should be protected, I believe that should be done > in > ethdev layer for all PMDs instead of each (some) PMD does it itself. > > Right. But NFP requires to access the device through the NFP CPP interface for things like setting the link up or down. If the secondary starts the port, it will use the CPP interface by itself. Because the CPP interface is not shared between processes by now but each one gets its own interface, the way the link is set requires to know which process type, primary or secondary, is doing it. I think to differentiate between primary and secondaries just in the probe function is fine ... except in our case. The way this NFP PMD multiprocess support is implemented has some limitations what I will try to overcome in the near future sharing the CPP interface (if possible). This will not be a real sharing, because it will get complex easily for supporting unlimited secondary processes, but a communication between primary and secondaries where just the primary will use the CPP interface, for itself and on behalf of the secondary ones. Implementing this is not trivial, and I want to have basic multiprocess support by now for at least a primary and a secondary because: 1) Apps like dpdk-procinfo are being used for getting information about running DPDK apps. 2) I plan to submit another patch implementing a CPP bridge inside the PMD using the service layer. This will allow user space tools accessing the NFP for development and debugging. The CPP bridge will be handle by a secondary process with the only purpose of giving this service when another DPDK app, the primary process, is running. > Following should be possible: > 1) > - Primary starts with 5 ports and configures them > - Start 5 secondary apps in a way each control a specific port data path > > With the path submitted, we will only support one secondary which could control one or any port started by the primary. Of course, I want to support the full case, but as I have said, doing this is not trivial, and basic support of just one secondary is a priority now. > 2) > - Primary start with N ports > - A secondary per port configure the port and control data path > > We can support that. > > Thanks, > ferruh > > After your concerns, I think I should sent another version updating the NFP guide with this support and the current limitations. Thanks. > > > > > > Signed-off-by: Alejandro Lucero > > --- > > doc/guides/nics/features/nfp.ini | 1 + > > doc/guides/nics/features/nfp_vf.ini| 1 + > > drivers/net/nfp/nfp_net.c | 133 + > > drivers/net/nfp/nfpcore/nfp_cpp_pcie_ops.c | 103 ++-- > > 4 files changed, 177 insertions(+), 61 deletions(-) > > > > diff --git a/doc/guides/nics/features/nfp.ini > b/doc/guides/nics/features/nfp.ini > > index d2899e7fb..70297b090 100644 > > --- a/doc/guides/nics/features/nfp.ini > > +++ b/doc/guides/nics/features/nfp.ini > > @@ -24,5 +24,6 @@ Basic stats = Y > > Stats per queue = Y > > Linux UIO= Y > > Linux VFIO = Y > > +Multiprocess aware = Y > > x86-64 = Y > > Usage doc= Y > > diff --git a/doc/guides/nics/features/nfp_vf.ini > b/doc/guides/nics/features/nfp_vf.ini > > index d2899e7fb..70297b090 100644 > > --- a/doc/guides/nics/features/nfp_vf.ini > > +++ b/doc/guides/nics/features/nfp_vf.ini > > @@ -24,5 +24,6 @@ Basic stats = Y > > Stats per queue = Y > > Linux UIO= Y > > Linux VFIO = Y > > +Multiprocess aware = Y > > x86-64 = Y > > Usage doc= Y > > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c > > index 54c6da924..ffef97d80 100644 > > --- a/drivers/net/nfp/nfp_net.c > > +++ b
Re: [dpdk-dev] Question about telemetry on 18.11 release
HI, Hideyuki, >Are there any reason why accompanied document does NOT exit? >> (if you know the reason why) exit->exist; I don't know. > Is it possible to contribute such a sample application by my side? > (Just idea, I need permission from my boss though..) > You may try, DPDK is an Open Source project. > Q8. > BTW, when I read jobstats sample, I felt that > application programmers should be fully aware of the jobstats > from the first place of software design (many timers in main function) > and it is NOT "opt-in" "opt-out" feature. I believe many people think the same. Regards, Rami Rosen
Re: [dpdk-dev] [PATCH 1/2] devtools: report the incorrect section when complaining
On Wed, Nov 28, 2018 at 02:07:25PM +0100, David Marchand wrote: > On Wed, Nov 28, 2018 at 1:35 PM Neil Horman wrote: > > > On Wed, Nov 28, 2018 at 11:28:52AM +0100, David Marchand wrote: > > > It does not hurt reporting the incriminated section. > > > > > > Before: > > > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in a > > > section other than the EXPERIMENTAL section of the version map > > > > > > After: > > > ERROR: symbol rte_meter_trtcm_rfc4115_color_aware_check is added in > > > +EXPERIMENTAL section other than the EXPERIMENTAL section of the > > > version map > > > > > nit: Its a bit odd in the changelog to have an example in which the > > incorect > > section being reported matches the expected section. I.e. its confusing > > to read > > "... is added in +EXPERIMENTAL section other than the EXPERIMENTAL > > section". > > Might be better to change the language of the report below and the example > > to be > > something like: > > > > ERROR: symbol is added in the section, but is expected > > to be > > added in the EXPERIMENTAL section > > > > ACK to the notion of reporting the offending section though. Thats a good > > idea. > > > > Ok, updated for v2. > Thanks! Neil > -- > David Marchand
Re: [dpdk-dev] [RFC 0/3] tqs: add thread quiescent state library
On Wed, 28 Nov 2018 05:31:56 + Honnappa Nagarahalli wrote: > > > Mixed feelings about this one. > > > > > > Love to see RCU used for more things since it is much better than > > > reader/writer locks for many applications. But hate to see DPDK > > > reinventing every other library and not reusing code. Userspace RCU > > > https://liburcu.org/ is widely supported by distro's, more throughly > > > tested and documented, and more flexiple. > > > > > > The issue with many of these reinventions is a tradeoff of DPDK > > > growing another dependency on external library versus using common code. > > > > Agree with the dependency issues. Sometimes flexibility also causes confusion > and features that are not necessarily required for a targeted use case. I > have seen that much of the functionality that can be left to the application > is implemented as part of the library. > I think having it in DPDK will give us control over the amount of capability > this library will have and freedom over changes we would like to make to such > a library. I also view DPDK as one package where all things required for data > plane development are available. > > > > For RCU, the big issue for me is the testing and documentation of how > > > to do RCU safely. Many people get it wrong! > Hopefully, we all will do a better job collectively :) > > > Reinventing RCU is not helping anyone. DPDK needs to fix its dependency model, and just admit that it is ok to build off of more than glibc. Having used liburcu, it can be done in a small manner and really isn't that confusing. Is your real issue the LGPL license of liburcu for your skittish customers?
[dpdk-dev] [PATCH] net/bonding: fix double fetch for active_slave_count
1. when memcpy slaves the internals->active_slave_count 1 2. return internals->active_slave_count is 2 3. the slaves[1] would be a random invalid value --- drivers/net/bonding/rte_eth_bond_api.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 21bcd50..ed7b02e 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -815,6 +815,7 @@ uint16_t len) { struct bond_dev_private *internals; + uint16_t active_slave_count; if (valid_bonded_port_id(bonded_port_id) != 0) return -1; @@ -824,13 +825,14 @@ internals = rte_eth_devices[bonded_port_id].data->dev_private; - if (internals->active_slave_count > len) + active_slave_count = internals->active_slave_count; + if (active_slave_count > len) return -1; memcpy(slaves, internals->active_slaves, - internals->active_slave_count * sizeof(internals->active_slaves[0])); + active_slave_count * sizeof(internals->active_slaves[0])); - return internals->active_slave_count; + return active_slave_count; } int -- 1.8.5.2.msysgit.0
[dpdk-dev] [PATCH] net/bonding: fix double fetch for active_slave_count
1. when memcpy slaves the internals->active_slave_count 1 2. return internals->active_slave_count is 2 3. the slaves[1] would be a random invalid value Signed-off-by: Haifeng Lin --- drivers/net/bonding/rte_eth_bond_api.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 21bcd50..ed7b02e 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -815,6 +815,7 @@ uint16_t len) { struct bond_dev_private *internals; + uint16_t active_slave_count; if (valid_bonded_port_id(bonded_port_id) != 0) return -1; @@ -824,13 +825,14 @@ internals = rte_eth_devices[bonded_port_id].data->dev_private; - if (internals->active_slave_count > len) + active_slave_count = internals->active_slave_count; + if (active_slave_count > len) return -1; memcpy(slaves, internals->active_slaves, - internals->active_slave_count * sizeof(internals->active_slaves[0])); + active_slave_count * sizeof(internals->active_slaves[0])); - return internals->active_slave_count; + return active_slave_count; } int -- 1.8.5.2.msysgit.0
[dpdk-dev] [PATCH] ethdev: support double precision RED queue weight
RED queue weight is currently specified as a negated log of 2. Add support for RED queue weight to be specified in double precision and TM capability flags for double precision and negated log2 RED queue weight support. Signed-off-by: Nikhil Rao --- lib/librte_ethdev/rte_tm.h | 41 ++-- drivers/net/softnic/rte_eth_softnic_tm.c | 7 -- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h index 646ef38..b13454d 100644 --- a/lib/librte_ethdev/rte_tm.h +++ b/lib/librte_ethdev/rte_tm.h @@ -393,6 +393,22 @@ struct rte_tm_capabilities { */ int cman_wred_byte_mode_supported; + /** Double precision RED queue weight support. When non-zero, this +* this parameter indicates that RED queue weight in double precision +* format is supported. +* @see struct rte_tm_red_params::wq_is_log2 +* @see struct rte_tm_red_params::wq_dp +*/ + int cman_wred_wq_dp_supported; + + /** Negated log2 RED queue weight support. When non-zero, this +* parameter indicates that RED queue weight in negated log2 format +* is supported. +* @see struct rte_tm_red_params::wq_is_log2 +* @see struct rte_tm_red_params::wq_log2 +*/ + int cman_wred_wq_log2_supported; + /** Head drop algorithm support. When non-zero, this parameter * indicates that there is at least one leaf node that supports the head * drop algorithm, which might not be true for all the leaf nodes. @@ -841,8 +857,27 @@ struct rte_tm_red_params { */ uint16_t maxp_inv; - /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */ - uint16_t wq_log2; + /** When non-zero, RED queue weight (wq) is negated log2 of queue +* weight +* +* @see struct rte_tm_capabilities::cman_wred_wq_dp_supported +* @see struct rte_tm_capabilities::cman_wred_wq_log2_supported +*/ + int wq_is_log2; + + union { + /** Double precision queue weight +* +* @see struct rte_tm_capabilities::cman_wred_wq_dp_supported +*/ + double wq_dp; + /** Negated log2 of queue weight (wq), +* i.e. wq = 1 / (2 ^ wq_log2) +* +* @see struct rte_tm_capabilities::cman_wred_wq_log2_supported +*/ + uint16_t wq_log2; + }; }; /** @@ -858,6 +893,8 @@ struct rte_tm_red_params { * * @see struct rte_tm_capabilities::cman_wred_packet_mode_supported * @see struct rte_tm_capabilities::cman_wred_byte_mode_supported + * @see struct rte_tm_capabilities::cman_wred_wq_dp_supported + * @see struct rte_tm_capabilities::cman_wred_wq_log2_supported */ struct rte_tm_wred_params { /** One set of RED parameters per packet color */ diff --git a/drivers/net/softnic/rte_eth_softnic_tm.c b/drivers/net/softnic/rte_eth_softnic_tm.c index baaafbe..e96ea8e 100644 --- a/drivers/net/softnic/rte_eth_softnic_tm.c +++ b/drivers/net/softnic/rte_eth_softnic_tm.c @@ -469,7 +469,8 @@ struct softnic_tmgr_port * .cman_wred_context_shared_n_max = 0, .cman_wred_context_shared_n_nodes_per_context_max = 0, .cman_wred_context_shared_n_contexts_per_node_max = 0, - + .cman_wred_wq_dp_supported = 0, + .cman_wred_wq_log2_supported = WRED_SUPPORTED, .mark_vlan_dei_supported = {0, 0, 0}, .mark_ip_ecn_tcp_supported = {0, 0, 0}, .mark_ip_ecn_sctp_supported = {0, 0, 0}, @@ -1243,8 +1244,10 @@ struct softnic_tmgr_port * for (color = RTE_TM_GREEN; color < RTE_TM_COLORS; color++) { uint32_t min_th = profile->red_params[color].min_th; uint32_t max_th = profile->red_params[color].max_th; + int wq_is_log2 = profile->red_params[color].wq_is_log2; - if (min_th > max_th || + if (wq_is_log2 == 0 || + min_th > max_th || max_th == 0 || min_th > UINT16_MAX || max_th > UINT16_MAX) -- 1.8.3.1
[dpdk-dev] [Bug 112] dpdk-18.05 not allowing to create bond interface with name "lan_bond"
https://bugs.dpdk.org/show_bug.cgi?id=112 Bug ID: 112 Summary: dpdk-18.05 not allowing to create bond interface with name "lan_bond" Product: DPDK Version: 18.05 Hardware: All OS: Linux Status: CONFIRMED Severity: normal Priority: Normal Component: other Assignee: dev@dpdk.org Reporter: anandashis...@gmail.com Target Milestone: --- Before upgrading to dpdk latest stable release (18.05), i was using dpdk-stable-17.05.1. With dpdk-stable-17.05.1, i was able to create bond interface with the name as "lan_bond". After upgrading to dpdk-18.05, the api rte_eth_bond_create is failing . On further testing, i found that it is accepting the bond interface name as "net_bonding0" but failing to accept the name as "lan_bond". Please help in this regard and let me know if more information is needed. Thanks in advance. -- You are receiving this mail because: You are the assignee for the bug.
Re: [dpdk-dev] [PATCH] ethdev: support double precision RED queue weight
On Thu, 29 Nov 2018 11:24:42 +0530 Nikhil Rao wrote: > RED queue weight is currently specified as a negated log of 2. > > Add support for RED queue weight to be specified in double precision > and TM capability flags for double precision and negated log2 > RED queue weight support. > > Signed-off-by: Nikhil Rao Since this is an ABI break anyway, why not just commit to the new format?
[dpdk-dev] [PATCH] bus/vdev: add warning for duplicated vdev name
If duplicated vdev name is detected, print out a warning message. Signed-off-by: Yahui Cao --- drivers/bus/vdev/vdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index 9c66bdc78..ff2db7d3f 100644 --- a/drivers/bus/vdev/vdev.c +++ b/drivers/bus/vdev/vdev.c @@ -462,6 +462,8 @@ vdev_scan(void) if (find_vdev(devargs->name)) { rte_spinlock_recursive_unlock(&vdev_device_list_lock); free(dev); + VDEV_LOG(WARNING, "duplicated vdev name %s detected!", + devargs->name); continue; } -- 2.17.1
[dpdk-dev] [PATCH v3] test: reduce test duration for efd autotest
Reduced test time for efd_autotest. Key length is updated, invoke times of random function is reduced. Different value is updated for each hash key entry. Signed-off-by: Jananee Parthasarathy --- v3: reverted the simple_key to uint8_t type v2: value updated for each hash key --- test/test/test_efd.c | 18 -- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/test/test/test_efd.c b/test/test/test_efd.c index ced091aab..94b490fdc 100644 --- a/test/test/test_efd.c +++ b/test/test/test_efd.c @@ -12,7 +12,6 @@ #include "test.h" -#define EFD_TEST_KEY_LEN 8 #define TABLE_SIZE (1 << 21) #define ITERATIONS 3 @@ -331,8 +330,9 @@ static int test_average_table_utilization(void) { struct rte_efd_table *handle = NULL; uint32_t num_rules_in = TABLE_SIZE; - uint8_t simple_key[EFD_TEST_KEY_LEN]; - unsigned int i, j; + uint8_t simple_key; + unsigned int j; + efd_value_t val; unsigned int added_keys, average_keys_added = 0; printf("Evaluating table utilization and correctness, please wait\n"); @@ -340,7 +340,7 @@ static int test_average_table_utilization(void) for (j = 0; j < ITERATIONS; j++) { handle = rte_efd_create("test_efd", num_rules_in, - EFD_TEST_KEY_LEN, efd_get_all_sockets_bitmask(), + sizeof(uint8_t), efd_get_all_sockets_bitmask(), test_socket_id); if (handle == NULL) { printf("efd table creation failed\n"); @@ -353,15 +353,13 @@ static int test_average_table_utilization(void) /* Add random entries until key cannot be added */ for (added_keys = 0; added_keys < num_rules_in; added_keys++) { - for (i = 0; i < EFD_TEST_KEY_LEN; i++) - simple_key[i] = rte_rand() & 0xFF; + simple_key = rte_rand() & 0xFF; + val = mrand48() & VALUE_BITMASK; - efd_value_t val = simple_key[0]; - - if (rte_efd_update(handle, test_socket_id, simple_key, + if (rte_efd_update(handle, test_socket_id, &simple_key, val)) break; /* continue;*/ - if (rte_efd_lookup(handle, test_socket_id, simple_key) + if (rte_efd_lookup(handle, test_socket_id, &simple_key) != val) lost_keys++; else -- 2.13.6