RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
Hi Maxime, Thanks for your reply and my reply is inline. -Original Message- From: Maxime Coquelin Sent: Tuesday, March 22, 2022 6:05 PM To: Pei, Andy ; dev@dpdk.org Cc: Xia, Chenbo ; Cao, Gang ; Liu, Changpeng Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device On 1/29/22 04:03, Andy Pei wrote: > For the blk we need to relay all the cmd of each queue. The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."? Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair. > > Signed-off-by: Andy Pei > --- > drivers/vdpa/ifc/ifcvf_vdpa.c | 46 > --- > 1 file changed, 35 insertions(+), 11 deletions(-) > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644 > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c > @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info { > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > irq_set->start = 0; > fd_ptr = (int *)&irq_set->data; > + /* The first interrupt is for the configure space change > +notification */ > fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = > rte_intr_fd_get(internal->pdev->intr_handle); > > for (i = 0; i < nr_vring; i++) > internal->intr_fd[i] = -1; > > - for (i = 0; i < nr_vring; i++) { > - rte_vhost_get_vhost_vring(internal->vid, i, &vring); > - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > - if ((i & 1) == 0 && m_rx == true) { > - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > - if (fd < 0) { > - DRV_LOG(ERR, "can't setup eventfd: %s", > - strerror(errno)); > - return -1; > + if (internal->device_type == IFCVF_NET) { > + for (i = 0; i < nr_vring; i++) { > + rte_vhost_get_vhost_vring(internal->vid, i, &vring); > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > + if ((i & 1) == 0 && m_rx == true) { > + /* For the net we only need to relay rx queue, > + * which will change the mem of VM. > + */ > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + DRV_LOG(ERR, "can't setup eventfd: %s", > + strerror(errno)); > + return -1; > + } > + internal->intr_fd[i] = fd; > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > + } > + } > + } else if (internal->device_type == IFCVF_BLK) { > + for (i = 0; i < nr_vring; i++) { > + rte_vhost_get_vhost_vring(internal->vid, i, &vring); > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > + if (m_rx == true) { > + /* For the blk we need to relay all the read cmd > + * of each queue > + */ > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + DRV_LOG(ERR, "can't setup eventfd: %s", > + strerror(errno)); > + return -1; > + } > + internal->intr_fd[i] = fd; > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > } > - internal->intr_fd[i] = fd; > - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > } > } >
RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device
Hi Maxime, I think it is better to change the commit log to your description. "For the block device type, we have to relay the commands on all queues." In the next version of patch set. -Original Message- From: Pei, Andy Sent: Wednesday, March 23, 2022 3:08 PM To: Maxime Coquelin ; dev@dpdk.org Cc: Xia, Chenbo ; Cao, Gang ; Liu, Changpeng Subject: RE: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device Hi Maxime, Thanks for your reply and my reply is inline. -Original Message- From: Maxime Coquelin Sent: Tuesday, March 22, 2022 6:05 PM To: Pei, Andy ; dev@dpdk.org Cc: Xia, Chenbo ; Cao, Gang ; Liu, Changpeng Subject: Re: [PATCH v3 04/15] vdpa/ifc: add vdpa interrupt for blk device On 1/29/22 04:03, Andy Pei wrote: > For the blk we need to relay all the cmd of each queue. The message is not clear to me, do you mean "For the block device type, we have to relay the commands on all queues."? Andy: Yes. For BLK device, device can work with single queue, comparing to NET device, NET device use queue pair. > > Signed-off-by: Andy Pei > --- > drivers/vdpa/ifc/ifcvf_vdpa.c | 46 > --- > 1 file changed, 35 insertions(+), 11 deletions(-) > > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 778e1fd..4f99bb3 100644 > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c > @@ -372,24 +372,48 @@ struct rte_vdpa_dev_info { > irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; > irq_set->start = 0; > fd_ptr = (int *)&irq_set->data; > + /* The first interrupt is for the configure space change > +notification */ > fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = > rte_intr_fd_get(internal->pdev->intr_handle); > > for (i = 0; i < nr_vring; i++) > internal->intr_fd[i] = -1; > > - for (i = 0; i < nr_vring; i++) { > - rte_vhost_get_vhost_vring(internal->vid, i, &vring); > - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > - if ((i & 1) == 0 && m_rx == true) { > - fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > - if (fd < 0) { > - DRV_LOG(ERR, "can't setup eventfd: %s", > - strerror(errno)); > - return -1; > + if (internal->device_type == IFCVF_NET) { > + for (i = 0; i < nr_vring; i++) { > + rte_vhost_get_vhost_vring(internal->vid, i, &vring); > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > + if ((i & 1) == 0 && m_rx == true) { > + /* For the net we only need to relay rx queue, > + * which will change the mem of VM. > + */ > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + DRV_LOG(ERR, "can't setup eventfd: %s", > + strerror(errno)); > + return -1; > + } > + internal->intr_fd[i] = fd; > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > + } > + } > + } else if (internal->device_type == IFCVF_BLK) { > + for (i = 0; i < nr_vring; i++) { > + rte_vhost_get_vhost_vring(internal->vid, i, &vring); > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd; > + if (m_rx == true) { > + /* For the blk we need to relay all the read cmd > + * of each queue > + */ > + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); > + if (fd < 0) { > + DRV_LOG(ERR, "can't setup eventfd: %s", > + strerror(errno)); > + return -1; > + } > + internal->intr_fd[i] = fd; > + fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > } > - internal->intr_fd[i] = fd; > - fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = fd; > } > } >
Re: [PATCH] net/netvsc: fix the calculation of checksums based on mbuf flag
On Wed, Mar 23, 2022 at 5:28 AM wrote: > > From: Long Li > > The netvsc should use RTE_MBUF_F_TX_L4_MASK and check the value to decide > the correct way to calculate checksums. It's better to describe what the impact for an application is. Do I understand correctly that UDP checksum offloading was broken? I guess you want this backported. Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") Cc: sta...@dpdk.org > > Signed-off-by: Long Li > --- > drivers/net/netvsc/hn_rxtx.c | 13 + > 1 file changed, 9 insertions(+), 4 deletions(-) > > diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c > index 028f176c7e..34f40be5b8 100644 > --- a/drivers/net/netvsc/hn_rxtx.c > +++ b/drivers/net/netvsc/hn_rxtx.c > @@ -1348,8 +1348,11 @@ static void hn_encap(struct rndis_packet_msg *pkt, > *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, >m->tso_segsz); > } > - } else if (m->ol_flags & > - (RTE_MBUF_F_TX_TCP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM | > RTE_MBUF_F_TX_IP_CKSUM)) { > + } else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_TCP_CKSUM || > + (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_UDP_CKSUM || > + (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { > pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, > NDIS_PKTINFO_TYPE_CSUM); > *pi_data = 0; > @@ -1363,9 +1366,11 @@ static void hn_encap(struct rndis_packet_msg *pkt, > *pi_data |= NDIS_TXCSUM_INFO_IPCS; > } > > - if (m->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) > + if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_TCP_CKSUM) > *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); > - else if (m->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) > + else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > + RTE_MBUF_F_TX_UDP_CKSUM) > *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); > } > > -- > 2.32.0 > -- David Marchand
Re: [PATCH] net/netvsc: report correct stats values
On Wed, Mar 23, 2022 at 5:28 AM wrote: > > From: Long Li > > The netvsc should add to the values from the VF and report the sum. We need a Fixes: tag. > > Signed-off-by: Long Li -- David Marchand
RE: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration
Hi Maxime, Thanks for your reply and reply is inline. -Original Message- From: Maxime Coquelin Sent: Tuesday, March 22, 2022 7:10 PM To: Pei, Andy ; dev@dpdk.org Cc: Xia, Chenbo ; Cao, Gang ; Liu, Changpeng Subject: Re: [PATCH v3 05/15] vdpa/ifc: add blk dev sw live migration Hi Andy, "vdpa/ifc: add block device SW live-migration" On 1/29/22 04:03, Andy Pei wrote: > Enable virtio blk sw live migration relay callfd and log the dirty page. Please try to make the above sentence simpler. Also, it seems that below patch changes behaviour for net devices, so the commit message should explain that. Andy: Sure, I think it is better to send out a new patch set. Using a simper commit log and re-work to make sure the code do not change the behavior of net device. > In this version we ignore the write cmd and still mark it dirty. > > Signed-off-by: Andy Pei > --- > drivers/vdpa/ifc/base/ifcvf.c | 4 +- > drivers/vdpa/ifc/base/ifcvf.h | 6 ++ > drivers/vdpa/ifc/ifcvf_vdpa.c | 128 > +++--- > 3 files changed, 116 insertions(+), 22 deletions(-) > > diff --git a/drivers/vdpa/ifc/base/ifcvf.c > b/drivers/vdpa/ifc/base/ifcvf.c index 721cb1d..3a69e53 100644 > --- a/drivers/vdpa/ifc/base/ifcvf.c > +++ b/drivers/vdpa/ifc/base/ifcvf.c > @@ -189,7 +189,7 @@ > IFCVF_WRITE_REG32(val >> 32, hi); > } > > -STATIC int > +int > ifcvf_hw_enable(struct ifcvf_hw *hw) > { > struct ifcvf_pci_common_cfg *cfg; > @@ -238,7 +238,7 @@ > return 0; > } > > -STATIC void > +void > ifcvf_hw_disable(struct ifcvf_hw *hw) > { > u32 i; > diff --git a/drivers/vdpa/ifc/base/ifcvf.h > b/drivers/vdpa/ifc/base/ifcvf.h index 769c603..6dd7925 100644 > --- a/drivers/vdpa/ifc/base/ifcvf.h > +++ b/drivers/vdpa/ifc/base/ifcvf.h > @@ -179,4 +179,10 @@ struct ifcvf_hw { > u64 > ifcvf_get_queue_notify_off(struct ifcvf_hw *hw, int qid); > > +int > +ifcvf_hw_enable(struct ifcvf_hw *hw); > + > +void > +ifcvf_hw_disable(struct ifcvf_hw *hw); > + > #endif /* _IFCVF_H_ */ > diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c > b/drivers/vdpa/ifc/ifcvf_vdpa.c index 4f99bb3..a930825 100644 > --- a/drivers/vdpa/ifc/ifcvf_vdpa.c > +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c > @@ -332,10 +332,67 @@ struct rte_vdpa_dev_info { > > rte_vhost_get_negotiated_features(vid, &features); > if (RTE_VHOST_NEED_LOG(features)) { > - ifcvf_disable_logging(hw); > - rte_vhost_get_log_base(internal->vid, &log_base, &log_size); > - rte_vfio_container_dma_unmap(internal->vfio_container_fd, > - log_base, IFCVF_LOG_BASE, log_size); > + if (internal->device_type == IFCVF_NET) { > + ifcvf_disable_logging(hw); > + rte_vhost_get_log_base(internal->vid, &log_base, > + &log_size); > + rte_vfio_container_dma_unmap( > + internal->vfio_container_fd, log_base, > + IFCVF_LOG_BASE, log_size); > + } > + /* IFCVF marks dirty memory pages for only packet buffer, > + * SW helps to mark the used ring as dirty after device stops. > + */ > + for (i = 0; i < hw->nr_vring; i++) { > + len = IFCVF_USED_RING_LEN(hw->vring[i].size); > + rte_vhost_log_used_vring(vid, i, 0, len); > + } > + } > +} > + > +static void > +vdpa_ifcvf_blk_pause(struct ifcvf_internal *internal) { > + struct ifcvf_hw *hw = &internal->hw; > + struct rte_vhost_vring vq; > + int i, vid; > + uint64_t features = 0; > + uint64_t log_base = 0, log_size = 0; > + uint64_t len; > + > + vid = internal->vid; > + > + if (internal->device_type == IFCVF_BLK) { > + for (i = 0; i < hw->nr_vring; i++) { > + rte_vhost_get_vhost_vring(internal->vid, i, &vq); > + while (vq.avail->idx != vq.used->idx) { > + ifcvf_notify_queue(hw, i); > + usleep(10); > + } > + hw->vring[i].last_avail_idx = vq.avail->idx; > + hw->vring[i].last_used_idx = vq.used->idx; > + } > + } > + > + ifcvf_hw_disable(hw); > + > + for (i = 0; i < hw->nr_vring; i++) > + rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx, > + hw->vring[i].last_used_idx); > + > + if (internal->sw_lm) > + return; > + > + rte_vhost_get_negotiated_features(vid, &features); > + if (RTE_VHOST_NEED_LOG(features)) { > + if (internal->device_type == IFCVF_NET) { > + ifcvf_disable_logging(hw); > + rte_vhost_get_log_base(internal->vid, &log_base, > + &log_size); > + rte_vfio_container_dma_un
Re: [PATCH 2/6] net/hns3: fix inconsistent enabled RSS behavior
23/03/2022 04:05, lihuisong (C): > 在 2022/3/23 1:13, Thomas Monjalon 写道: > > 21/03/2022 08:14, lihuisong (C): > >> 2022/3/10 16:08, lihuisong (C): > >>> 2022/3/9 17:55, Ori Kam: > From: lihuisong (C) > > 2022/3/3 10:47, lihuisong (C): > >> 2022/3/2 22:07, Ori Kam: > >>> From: lihuisong (C) > 2022/3/1 0:42, Ferruh Yigit: > > On 2/28/2022 3:21 AM, Min Hu (Connor) wrote: > >> From: Huisong Li > >> > >> RSS will not be enabled if the RTE_ETH_MQ_RX_RSS_FLAG isn't be > >> set in > >> dev_configure phase. However, if this flag isn't set, RSS can be > >> enabled > >> through the ethdev ops and rte_flow API. This behavior is > >> contrary to > >> each > >> other. > >> > >> Fixes: c37ca66f2b27 ("net/hns3: support RSS") > >> Cc: sta...@dpdk.org > >> > >> Signed-off-by: Huisong Li > > Hi Huisong, Connor, > > > > Let's get a little more feedback for this patch, cc'ed more people. > > > > To enable RSS, multi queue mode should be set to > > 'RTE_ETH_MQ_RX_RSS_FLAG'. > > > > But I wonder if it is required to configure RSS via flow API, > I do not know the original purpose of adding the RSS > configuration in > flow API. > > >>> The purpose is simple, this allow to create RSS per rule and not a > >>> global one. > >>> For example create RSS that sends TCP to some queues while othe RSS > >>> will send > >>> UDP traffic to different queues. > >> I'm a little confused now. The "per rule" also seems to be a global > >> configuration. > >> Example: > >>- start PMD with 0,1,2,3 > >>- create TCP packets to 2,3 queues. At this moment, only 2,3 queues > >> can be received for other types of packets. > >> Because this rule is implemented by modifying the entry of the > >> redirection table which is global for this device. > > Hi, Ori and Stephen. > > Can you help me clear up the confusion above? If some NICs behave like > > this, what should we do about it? > I'm not sure I understand the issue, maybe it is releated to some > HW/PMD limitation. > In your example non TCP traffic will be routed to one of the 4 queues > (0,1,2,3), > While TCP traffic will only be routed to queues 2,3. > > Now I can add new rule that matches on UDP packet and RSS to queue 0 > and 3 in this case: > TCP packets will be routed to queues 0,3. > UDP packets will be routed to queues 2,3. > All the rest of the traffic will be routed to queues 0,1,2,3 > > And just to be clear if now I add a rule to match all packets in > higher priority, > with RSS to queues 1,2. Then all traffic will be routed to queues 1,2. > > At least this is what is expected, from API point of view. > > Best, > Ori > >>> Thank you for your answer. I understand it. > >>> hns3 PMD cannot implement the above functions due to hardware limitation. > >>> we may need add a check that specified RSS queues cannot be supported > >>> when specified packets types. > >>> And only the packet type is specified, which meets the requirements of > >>> rte_flow API. > >>> The check for the RTE_ETH_MQ_RX_RSS_FLAG flag in rte_flow is not correct. > >>> Thanks, Ori and Stephen😁 > >>> > >>> But, I think, it is necessary for the '.rss_hash_update' and > >>> '.reta_update' APIs > >>> in eth_dev_ops to verify this flag. What do you think? @Thomas, > >>> @Ferruh, @Ori and @Stephen. > >> What's your take on it? I am looking forward to your reply. Thanks! > > > > I am not sure why you want to check this flag. > > I want to make sure that the behavior that PMD configured RSS is > consistent across different interfaces. The RTE_ETH_MQ_RX_RSS_FLAG > flag is a switch to enable RSS hash. If the switch isn't open, some > PMD do not configure RSS function. I think the consistency is necessary. > If not set RSS muti-queue mode, it is unnecessary to configure RSS. > > > I can imagine we configure the hash and the table before enabling RSS > > with the RTE_ETH_MQ_RX_RSS_FLAG flag. > > The flag is derived from dev_configure() which also configures > hash and key. I don't think it makes sense to configure hash and > reta before calling dev_configure. Because they'll be updated. > This is similar to configuring mtu. OK I see your point. So you would like to return an error in RSS functions if the flag RTE_ETH_MQ_RX_RSS_FLAG is not set? Should it be checked in ethdev library or PMDs?
[PATCH] devtools: document ABI suppression rules
Suppression rules are being added during the life of an ABI and cleaned when bumping the major version. Sort and document those rules to avoid pruning rules that should be kept. Signed-off-by: David Marchand --- devtools/libabigail.abignore | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/devtools/libabigail.abignore b/devtools/libabigail.abignore index c618f20032..587cc5c381 100644 --- a/devtools/libabigail.abignore +++ b/devtools/libabigail.abignore @@ -1,3 +1,7 @@ +;;; +; Core suppression rules: DO NOT TOUCH; +;;; + [suppress_function] symbol_version = EXPERIMENTAL [suppress_variable] @@ -16,15 +20,23 @@ [suppress_file] soname_regexp = ^librte_.*mlx.*glue\. -; Ignore fields inserted in place of reserved_opts of rte_security_ipsec_sa_options -[suppress_type] -name = rte_security_ipsec_sa_options -has_data_member_inserted_between = {offset_of(reserved_opts), end} + +; Experimental APIs exceptions ; + ; Ignore changes to rte_crypto_asym_op, asymmetric crypto API is experimental [suppress_type] name = rte_crypto_asym_op + +; Temporary exceptions till next major ABI version ; + + +; Ignore fields inserted in place of reserved_opts of rte_security_ipsec_sa_options +[suppress_type] +name = rte_security_ipsec_sa_options +has_data_member_inserted_between = {offset_of(reserved_opts), end} + ; Ignore section attribute fixes in experimental regexdev library [suppress_file] soname_regexp = ^librte_regexdev\. -- 2.23.0
[PATCH] eal: factorize lcore main loop
All OS implementations provide the same main loop. Introduce helpers (shared for Linux and FreeBSD) to handle synchronisation between main and threads and factorize the rest as common code. Thread id are now logged as string in a common format across OS. Signed-off-by: David Marchand --- I had this patch in store for a long time. I don't particularly care about it, it's not fixing anything. But it seems a good cleanup/consolidation, so I rebased it and I am sending it to get feedback. --- lib/eal/common/eal_common_launch.c | 36 +++- lib/eal/common/eal_common_thread.c | 72 +++ lib/eal/common/eal_thread.h| 27 ++ lib/eal/freebsd/eal.c | 7 +- lib/eal/freebsd/eal_thread.c | 138 lib/eal/linux/eal.c| 4 +- lib/eal/linux/eal_thread.c | 139 + lib/eal/unix/eal_unix_thread.c | 63 + lib/eal/unix/meson.build | 5 +- lib/eal/windows/eal_thread.c | 137 +++- 10 files changed, 236 insertions(+), 392 deletions(-) create mode 100644 lib/eal/unix/eal_unix_thread.c diff --git a/lib/eal/common/eal_common_launch.c b/lib/eal/common/eal_common_launch.c index 9f393b9bda..5770803172 100644 --- a/lib/eal/common/eal_common_launch.c +++ b/lib/eal/common/eal_common_launch.c @@ -5,11 +5,13 @@ #include #include +#include #include #include #include #include "eal_private.h" +#include "eal_thread.h" /* * Wait until a lcore finished its job. @@ -18,12 +20,44 @@ int rte_eal_wait_lcore(unsigned worker_id) { while (__atomic_load_n(&lcore_config[worker_id].state, - __ATOMIC_ACQUIRE) != WAIT) + __ATOMIC_ACQUIRE) != WAIT) rte_pause(); return lcore_config[worker_id].ret; } +/* + * Send a message to a worker lcore identified by worker_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switches to WAIT state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned int worker_id) +{ + int rc = -EBUSY; + + /* Check if the worker is in 'WAIT' state. Use acquire order +* since 'state' variable is used as the guard variable. +*/ + if (__atomic_load_n(&lcore_config[worker_id].state, + __ATOMIC_ACQUIRE) != WAIT) + goto finish; + + lcore_config[worker_id].arg = arg; + /* Ensure that all the memory operations are completed +* before the worker thread starts running the function. +* Use worker thread function as the guard variable. +*/ + __atomic_store_n(&lcore_config[worker_id].f, f, __ATOMIC_RELEASE); + + eal_thread_wake_worker(worker_id); + rc = 0; + +finish: + rte_eal_trace_thread_remote_launch(f, arg, worker_id, rc); + return rc; +} + /* * Check that every WORKER lcores are in WAIT state, then call * rte_eal_remote_launch() for all of them. If call_main is true diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c index 684bea166c..256de91abc 100644 --- a/lib/eal/common/eal_common_thread.c +++ b/lib/eal/common/eal_common_thread.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -163,6 +164,77 @@ __rte_thread_uninit(void) RTE_PER_LCORE(_lcore_id) = LCORE_ID_ANY; } +/* main loop of threads */ +__rte_noreturn void * +eal_thread_loop(__rte_unused void *arg) +{ + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + pthread_t thread_id = pthread_self(); + unsigned int lcore_id; + int ret; + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_WORKER(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + __rte_thread_init(lcore_id, &lcore_config[lcore_id].cpuset); + + ret = eal_thread_dump_current_affinity(cpuset, sizeof(cpuset)); + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n", + lcore_id, (uintptr_t)thread_id, cpuset, + ret == 0 ? "" : "..."); + + rte_eal_trace_thread_lcore_ready(lcore_id, cpuset); + + /* read on our pipe to get commands */ + while (1) { + lcore_function_t *f; + void *fct_arg; + + eal_thread_wait_command(); + + /* Set the state to 'RUNNING'. Use release order +* since 'state' variable is used as the guard variable. +*/ + __atomic_store_n(&lcore_config[lcore_id].state, RUNNING, + __ATOMIC_RELEASE); + + eal_thread_ack_command(); + + /* Load 'f' with acquire order to ensure that +* the
RE: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example
Hi Maxime, I seems a lot of problem with the example. I think I will re-work the example according to your comments. Thanks for your comments. -Original Message- From: Maxime Coquelin Sent: Tuesday, March 22, 2022 7:30 PM To: Pei, Andy ; dev@dpdk.org Cc: Xia, Chenbo ; Cao, Gang ; Liu, Changpeng Subject: Re: [PATCH v3 06/15] example/vdpa:add vdpa blk support in example On 1/29/22 04:03, Andy Pei wrote: > Add virtio blk device support to vdpa example. > > Signed-off-by: Andy Pei > --- > examples/vdpa/Makefile | 2 +- > examples/vdpa/main.c | 8 ++ > examples/vdpa/meson.build| 1 + > examples/vdpa/vdpa_blk_compact.c | 150 +++ > examples/vdpa/vdpa_blk_compact.h | 117 > examples/vdpa/vhost_user.h | 189 > +++ > 6 files changed, 466 insertions(+), 1 deletion(-) > create mode 100644 examples/vdpa/vdpa_blk_compact.c > create mode 100644 examples/vdpa/vdpa_blk_compact.h > create mode 100644 examples/vdpa/vhost_user.h > > diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile index > d974db4..9d0479b 100644 > --- a/examples/vdpa/Makefile > +++ b/examples/vdpa/Makefile > @@ -5,7 +5,7 @@ > APP = vdpa > > # all source are stored in SRCS-y > -SRCS-y := main.c > +SRCS-y := main.c vdpa_blk_compact.c > CFLAGS += -DALLOW_EXPERIMENTAL_API > > PKGCONF ?= pkg-config > diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c index > 5ab0765..924ad7b 100644 > --- a/examples/vdpa/main.c > +++ b/examples/vdpa/main.c > @@ -20,6 +20,7 @@ > #include > #include > #include > +#include "vdpa_blk_compact.h" > > #define MAX_PATH_LEN 128 > #define MAX_VDPA_SAMPLE_PORTS 1024 > @@ -156,6 +157,7 @@ struct vdpa_port { > static const struct rte_vhost_device_ops vdpa_sample_devops = { > .new_device = new_device, > .destroy_device = destroy_device, > + .new_connection = rte_vhost_blk_session_install_rte_compat_hooks, > }; > > static int > @@ -192,6 +194,12 @@ struct vdpa_port { > "attach vdpa device failed: %s\n", > socket_path); > > + if (vdpa_blk_device_set_features_and_protocol(socket_path, vport->dev) > + < 0) > + rte_exit(EXIT_FAILURE, > + "set vhost blk driver features and protocol features > failed: %s\n", > + socket_path); > + That does not look right, blk devices specitic functions shuold be called only for block devices. > if (rte_vhost_driver_start(socket_path) < 0) > rte_exit(EXIT_FAILURE, > "start vhost driver failed: %s\n", diff --git > a/examples/vdpa/meson.build b/examples/vdpa/meson.build index > bd08605..f0d111c 100644 > --- a/examples/vdpa/meson.build > +++ b/examples/vdpa/meson.build > @@ -15,4 +15,5 @@ deps += 'vhost' > allow_experimental_apis = true > sources = files( > 'main.c', > + 'vdpa_blk_compact.c', > ) > diff --git a/examples/vdpa/vdpa_blk_compact.c > b/examples/vdpa/vdpa_blk_compact.c > new file mode 100644 > index 000..0c4d3ee > --- /dev/null > +++ b/examples/vdpa/vdpa_blk_compact.c > @@ -0,0 +1,150 @@ > +/*INTEL CONFIDENTIAL > + * > + *Copyright (c) Intel Corporation. > + *All rights reserved. > + * > + *The source code contained or described herein and all documents related > + *to the source code ("Material") are owned by Intel Corporation or its > + *suppliers or licensors. Title to the Material remains with Intel > + *Corporation or its suppliers and licensors. The Material contains > trade > + *secrets and proprietary and confidential information of Intel or its > + *suppliers and licensors. The Material is protected by worldwide > + *copyright and trade secret laws and treaty provisions. No part of the > + *Material may be used, copied, reproduced, modified, published, > uploaded, > + *posted, transmitted, distributed, or disclosed in any way without > Intel's > + *prior express written permission. > + * > + *No license under any patent, copyright, trade secret or other > + *intellectual property right is granted to or conferred upon you by > + *disclosure or delivery of the Materials, either expressly, by > + *implication, inducement, estoppel or otherwise. Any license under such > + *intellectual property rights must be express and approved by Intel in > + *writing. > + */ > + > +/* @file > + * > + * Block device specific vhost lib > + */ > + > +#include > + > +#include > +#include That's wrong, the application is not supposed to include the driver APIs. > +#include > +#include "vdpa_blk_compact.h" > +#include "vhost_user.h" > + > +#define VHOST_USER_GET_CONFIG24 > +#define VHOST_USER_SET_CONFIG25 > + > +#ifndef VHOST_USER_PROTOCOL_F_CONFIG > +#define VHOST_USER_PROTOCOL_F_CONFIG 9 > +#endi
RE: [PATCH] eal: factorize lcore main loop
> From: David Marchand [mailto:david.march...@redhat.com] > Sent: Wednesday, 23 March 2022 10.30 > > All OS implementations provide the same main loop. > Introduce helpers (shared for Linux and FreeBSD) to handle > synchronisation > between main and threads and factorize the rest as common code. > Thread id are now logged as string in a common format across OS. > > Signed-off-by: David Marchand > --- > I had this patch in store for a long time. > I don't particularly care about it, it's not fixing anything. > But it seems a good cleanup/consolidation, so I rebased it and I am > sending it to get feedback. > LGTM. I'm always in favor of cleaning up! :-) Thank you, David. Acked-By: Morten Brørup
RE: DPDK seqlock
> From: Mattias Rönnblom [mailto:mattias.ronnb...@ericsson.com] > Sent: Tuesday, 22 March 2022 17.10 > > Hi. > > Would it make sense to have a seqlock implementation in DPDK? Certainly! > > I think so, since it's a very useful synchronization primitive in data > plane applications. Yes, and having it in DPDK saves application developers from writing their own (with the risks coming with that). > > Regards, > Mattias
RE: [PATCH v5] ip_frag: add IPv4 options fragment and test data
> According to RFC791,the options may appear or not in datagrams. > They must be implemented by all IP modules (host and gateways). > What is optional is their transmission in any particular datagram, > not their implementation.So we have to deal with it during the > fragmenting process.Add some test data for the IPv4 header optional > field fragmenting. > > Signed-off-by: Huichao Cai > --- Acked-by: Konstantin Ananyev > 1.8.3.1
Re: 20.11.5 patches review and test
Hello Luca, The testing with dpdk 20.11.5-rc1 from Red Hat looks good. We tested below 17 scenarios and all got PASS on RHEL8: (1)Guest with device assignment(PF) throughput testing(1G hugepage size): PASS (2)Guest with device assignment(PF) throughput testing(2M hugepage size) : PASS (3)Guest with device assignment(VF) throughput testing: PASS (4)PVP (host dpdk testpmd as vswitch) 1Q: throughput testing: PASS (5)PVP vhost-user 2Q throughput testing: PASS (6)PVP vhost-user 1Q - cross numa node throughput testing: PASS (7)Guest with vhost-user 2 queues throughput testing: PASS (8)vhost-user reconnect with dpdk-client, qemu-server: qemu reconnect: PASS (9)vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect: PASS (10)PVP 1Q live migration testing: PASS (11)PVP 1Q cross numa node live migration testing: PASS (12)Guest with ovs+dpdk+vhost-user 1Q live migration testing: PASS (13)Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M): PASS (14)Guest with ovs+dpdk+vhost-user 2Q live migration testing: PASS (15)Guest with ovs+dpdk+vhost-user 4Q live migration testing: PASS (16)Host PF + DPDK testing: PASS (17)Host VF + DPDK testing: PASS Versions: kernel 4.18 qemu 6.2 dpdk: git://dpdk.org/dpdk-stable branch: 20.11 # git log -1 commit 576842a59ab35979dc102535f59061fa3d6ea16b Author: Luca Boccassi Date: Fri Mar 18 15:01:38 2022 + version: 20.11.5-rc1 Signed-off-by: Luca Boccassi NICs: X540-AT2 NIC(ixgbe, 10G) Best regards, Pei On Fri, Mar 18, 2022 at 11:21 PM wrote: > Hi all, > > Here is a list of patches targeted for stable release 20.11.5. > > The planned date for the final release is the 4th of April. > > Please help with testing and validation of your use cases and report > any issues/results with reply-all to this mail. For the final release > the fixes and reported validations will be added to the release notes. > > A release candidate tarball can be found at: > > https://dpdk.org/browse/dpdk-stable/tag/?id=v20.11.5-rc1 > > These patches are located at branch 20.11 of dpdk-stable repo: > https://dpdk.org/browse/dpdk-stable/ > > Thanks. > > Luca Boccassi > > --- > Adham Masarwah (1): > app/testpmd: fix show RSS RETA on Windows > > Ajit Khaparde (4): > net/bnxt: fix PAM4 mask setting > net/bnxt: check VF representor pointer before access > net/bnxt: fix VF resource allocation strategy > net/bnxt: fix ring calculation for representors > > Alexander Kozyrev (2): > net/mlx5: fix maximum packet headers size for TSO > net/mlx5: fix committed bucket size > > Ali Alnubani (1): > doc: fix typos and punctuation in flow API guide > > Anatoly Burakov (1): > net/qede: fix redundant condition in debug code > > Andrzej Ostruszka (1): > ring: optimize corner case for enqueue/dequeue > > Andy Pei (1): > vdpa/ifc: fix log info mismatch > > Arek Kusztal (1): > cryptodev: fix RSA key type name > > Bin Zheng (1): > net/ixgbe: add vector Rx parameter check > > Bing Zhao (2): > net/mlx5: fix matcher priority with ICMP or ICMPv6 > net/mlx5: remove unused reference counter > > Brian Dooley (11): > eal: add missing C++ guards > telemetry: add missing C++ guards > ethdev: add missing C++ guards > metrics: add missing C++ guards > acl: add missing C++ guards > compressdev: add missing C++ guards > eventdev: add missing C++ guards > kni: add missing C++ guards > vhost: add missing C++ guards > examples/l2fwd-crypto: fix port mask overflow > crypto/virtio: fix out-of-bounds access > > Bruce Richardson (19): > doc: remove dependency on findutils on FreeBSD > dma/idxd: fix paths to driver sysfs directory > build: fix warnings when running external commands > build: fix warning about using -Wextra flag > build: remove deprecated Meson functions > eal: fix C++ include > eventdev: fix C++ include > graph: fix C++ include > ipsec: fix C++ include > table: fix C++ include > vhost: fix C++ include > ethdev: fix cast for C++ compatibility > dma/idxd: configure maximum batch size to high value > distributor: fix potential overflow > eal/freebsd: add missing C++ include guards > compressdev: fix missing space in log macro > cryptodev: fix clang C++ include > doc: replace characters for (R) symbol in Linux guide > doc: fix missing note on UIO module in Linux guide > > Chandubabu Namburu (1): > net/axgbe: use PCI root complex device to distinguish device > > Chenbo Xia (1): > vhost: fix queue number check when setting inflight FD > > Chengchang Tang (1): > net/bonding: fix offloading configuration > > Chengwen Feng (1): > net/hns3: delete duplicated RSS type > > Chuanshe Zhang (1): > examples/flow_classify: fix failure message > > Ciara Loftus (2): > net/af_xdp: fix build with -Wunused-function >
[Bug 975] Power DOWN of copper interface not working in e1000 driver
https://bugs.dpdk.org/show_bug.cgi?id=975 Bug ID: 975 Summary: Power DOWN of copper interface not working in e1000 driver Product: DPDK Version: 22.03 Hardware: All OS: All Status: UNCONFIRMED Severity: normal Priority: Normal Component: ethdev Assignee: dev@dpdk.org Reporter: tobias.karls...@netscout.com Target Milestone: --- A bug crept in via commit 4414059f151f39f7e075b887decfc9a10f11 In function e1000_power_down_phy_copper_base() in file drivers/net/e1000/base/e1000_base.c The following change is needed or the interface will not be brought down properly: - if (phy->ops.check_reset_block(hw)) + if (phy->ops.check_reset_block(hw) == E1000_SUCCESS) e1000_power_down_phy_copper(hw); Please also update the comment above these code lines. -- You are receiving this mail because: You are the assignee for the bug.
Re: [PATCH] net/netvsc: fix the calculation of checksums based on mbuf flag
On Tue, 22 Mar 2022 21:28:07 -0700 lon...@linuxonhyperv.com wrote: > From: Long Li > > The netvsc should use RTE_MBUF_F_TX_L4_MASK and check the value to decide > the correct way to calculate checksums. > > Signed-off-by: Long Li Acked-by: Stephen Hemminger
Re: [PATCH] net/netvsc: report correct stats values
On Tue, 22 Mar 2022 21:28:25 -0700 lon...@linuxonhyperv.com wrote: > From: Long Li > > The netvsc should add to the values from the VF and report the sum. > > Signed-off-by: Long Li > --- Acked-by: Stephen Hemminger
Re: 20.11.5 patches review and test
On Wed, 2022-03-23 at 21:33 +0800, Pei Zhang wrote: > Hello Luca, > > The testing with dpdk 20.11.5-rc1 from Red Hat looks good. We tested > below 17 scenarios and all got PASS on RHEL8: > > (1)Guest with device assignment(PF) throughput testing(1G hugepage > size): PASS > (2)Guest with device assignment(PF) throughput testing(2M hugepage > size) : PASS > (3)Guest with device assignment(VF) throughput testing: PASS > (4)PVP (host dpdk testpmd as vswitch) 1Q: throughput testing: PASS > (5)PVP vhost-user 2Q throughput testing: PASS > (6)PVP vhost-user 1Q - cross numa node throughput testing: PASS > (7)Guest with vhost-user 2 queues throughput testing: PASS > (8)vhost-user reconnect with dpdk-client, qemu-server: qemu > reconnect: PASS > (9)vhost-user reconnect with dpdk-client, qemu-server: ovs reconnect: > PASS > (10)PVP 1Q live migration testing: PASS > (11)PVP 1Q cross numa node live migration testing: PASS > (12)Guest with ovs+dpdk+vhost-user 1Q live migration testing: PASS > (13)Guest with ovs+dpdk+vhost-user 1Q live migration testing (2M): > PASS > (14)Guest with ovs+dpdk+vhost-user 2Q live migration testing: PASS > (15)Guest with ovs+dpdk+vhost-user 4Q live migration testing: PASS > (16)Host PF + DPDK testing: PASS > (17)Host VF + DPDK testing: PASS > > Versions: > kernel 4.18 > qemu 6.2 > dpdk: git://dpdk.org/dpdk-stable branch: 20.11 > # git log -1 > commit 576842a59ab35979dc102535f59061fa3d6ea16b > Author: Luca Boccassi > Date: Fri Mar 18 15:01:38 2022 + > version: 20.11.5-rc1 > Signed-off-by: Luca Boccassi > > NICs: X540-AT2 NIC(ixgbe, 10G) > > Best regards, > > Pei Thank you! -- Kind regards, Luca Boccassi
[PATCH v1] dmadev: add telemetry support
Telemetry commands are now registered through the dmadev library for the gathering of DSA stats. The corresponding callback functions for listing dmadevs and providing info and stats for a specific dmadev are implemented in the dmadev library. An example usage can be seen below: Connecting to /var/run/dpdk/rte/dpdk_telemetry.v2 {"version": "DPDK 22.03.0-rc2", "pid": 2956551, "max_output_len": 16384} Connected to application: "dpdk-dma" --> / {"/": ["/", "/dmadev/info", "/dmadev/list", "/dmadev/stats", ...]} --> /dmadev/list {"/dmadev/list": [0, 1]} --> /dmadev/info,0 {"/dmadev/info": {"name": ":00:01.0", "nb_vchans": 1, "numa_node": 0}} --> /dmadev/stats,0,0 {"/dmadev/stats": {"submitted": 0, "completed": 0, "errors": 0}} Signed-off-by: Sean Morrissey --- doc/guides/prog_guide/dmadev.rst | 24 ++ doc/guides/rel_notes/release_22_07.rst | 4 + lib/dmadev/meson.build | 2 + lib/dmadev/rte_dmadev.c| 105 + 4 files changed, 135 insertions(+) diff --git a/doc/guides/prog_guide/dmadev.rst b/doc/guides/prog_guide/dmadev.rst index 77863f8028..1ff66dfe2a 100644 --- a/doc/guides/prog_guide/dmadev.rst +++ b/doc/guides/prog_guide/dmadev.rst @@ -118,3 +118,27 @@ i.e. ``rte_dma_stats_get()``. The statistics returned for each device instance a * ``submitted``: The number of operations submitted to the device. * ``completed``: The number of operations which have completed (successful and failed). * ``errors``: The number of operations that completed with error. + +The dmadev library has support for displaying DMA device information +through the Telemetry interface. Telemetry commands that can be used +are shown below. + +#. Get the list of available DMA devices by ID:: + + --> /dmadev/list + {"/dmadev/list": [0, 1]} + +#. Get general information from a DMA device:: + + --> /dmadev/info,0 + {"/dmadev/info": {"name": ":00:01.0", "nb_vchans": 1, + "numa_node": 0}} + +#. Get the statistics for a particular DMA device and virtual DMA channel:: + + --> /dmadev/stats,0,0 + {"/dmadev/stats": {"submitted": 0, "completed": 0, + "errors": 0}} + +For more information on how to use the Telemetry interface, see +the :doc:`../howto/telemetry`. diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst index 42a5f2d990..5a236b45ae 100644 --- a/doc/guides/rel_notes/release_22_07.rst +++ b/doc/guides/rel_notes/release_22_07.rst @@ -55,6 +55,10 @@ New Features Also, make sure to start the actual text at the margin. === +* **Added telemetry callbacks to dmadev library.** + + Added telemetry callback functions which allow for a list of DMA devices, + stats for a DMA device, and other DMA device information to be queried. Removed Items - diff --git a/lib/dmadev/meson.build b/lib/dmadev/meson.build index d2fc85e8c7..2f17587b75 100644 --- a/lib/dmadev/meson.build +++ b/lib/dmadev/meson.build @@ -5,3 +5,5 @@ sources = files('rte_dmadev.c') headers = files('rte_dmadev.h') indirect_headers += files('rte_dmadev_core.h') driver_sdk_headers += files('rte_dmadev_pmd.h') + +deps += ['telemetry'] diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c index d4b32b2971..2f068cb9d8 100644 --- a/lib/dmadev/rte_dmadev.c +++ b/lib/dmadev/rte_dmadev.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "rte_dmadev.h" #include "rte_dmadev_pmd.h" @@ -864,3 +865,107 @@ dma_fp_object_dummy(struct rte_dma_fp_object *obj) obj->completed_status = dummy_completed_status; obj->burst_capacity = dummy_burst_capacity; } + +static int +dmadev_handle_dev_list(const char *cmd __rte_unused, + const char *params __rte_unused, + struct rte_tel_data *d) +{ + int dev_id; + + if (rte_dma_count_avail() == 0) + return -EINVAL; + + rte_tel_data_start_array(d, RTE_TEL_INT_VAL); + for (dev_id = 0; dev_id < dma_devices_max; dev_id++) + if (rte_dma_is_valid(dev_id)) + rte_tel_data_add_array_int(d, dev_id); + + return 0; +} + +static int +dmadev_handle_dev_info(const char *cmd __rte_unused, + const char *params, struct rte_tel_data *d) +{ + struct rte_dma_info dma_info; + int dev_id, ret; + char *end_param; + + if (params == NULL || strlen(params) == 0 || !isdigit(*params)) + return -EINVAL; + + dev_id = strtoul(params, &end_param, 0); + if (*end_param != '\0') + RTE_DMA_LOG(ERR, + "Extra parameters passed to dmadev telemetry command, ignoring"); + + if (!rte_dma_is_valid(dev_id)) + return -EINVAL; + + ret = rte_dma_info_get(dev_id, &dma_info); + if (ret < 0) + return -EINVAL; + + rte_tel_data_start_dict(d); + rte_tel_data_add_dict_string(d, "n
Re: [PATCH 2/6] net/hns3: fix inconsistent enabled RSS behavior
23/03/2022 12:04, lihuisong (C): > 在 2022/3/23 17:14, Thomas Monjalon 写道: > > 23/03/2022 04:05, lihuisong (C): > >> 在 2022/3/23 1:13, Thomas Monjalon 写道: > >>> 21/03/2022 08:14, lihuisong (C): > 2022/3/10 16:08, lihuisong (C): > > 2022/3/9 17:55, Ori Kam: > >> From: lihuisong (C) > >>> 2022/3/3 10:47, lihuisong (C): > 2022/3/2 22:07, Ori Kam: > > From: lihuisong (C) > >> 2022/3/1 0:42, Ferruh Yigit: > >>> On 2/28/2022 3:21 AM, Min Hu (Connor) wrote: > From: Huisong Li > > RSS will not be enabled if the RTE_ETH_MQ_RX_RSS_FLAG isn't be > set in > dev_configure phase. However, if this flag isn't set, RSS can be > enabled > through the ethdev ops and rte_flow API. This behavior is > contrary to > each > other. > > Fixes: c37ca66f2b27 ("net/hns3: support RSS") > Cc: sta...@dpdk.org > > Signed-off-by: Huisong Li > >>> Hi Huisong, Connor, > >>> > >>> Let's get a little more feedback for this patch, cc'ed more > >>> people. > >>> > >>> To enable RSS, multi queue mode should be set to > >>> 'RTE_ETH_MQ_RX_RSS_FLAG'. > >>> > >>> But I wonder if it is required to configure RSS via flow API, > >> I do not know the original purpose of adding the RSS > >> configuration in > >> flow API. > >> > > The purpose is simple, this allow to create RSS per rule and not a > > global one. > > For example create RSS that sends TCP to some queues while othe RSS > > will send > > UDP traffic to different queues. > I'm a little confused now. The "per rule" also seems to be a global > configuration. > Example: > - start PMD with 0,1,2,3 > - create TCP packets to 2,3 queues. At this moment, only 2,3 > queues > can be received for other types of packets. > Because this rule is implemented by modifying the entry of the > redirection table which is global for this device. > >>> Hi, Ori and Stephen. > >>> Can you help me clear up the confusion above? If some NICs behave like > >>> this, what should we do about it? > >> I'm not sure I understand the issue, maybe it is releated to some > >> HW/PMD limitation. > >> In your example non TCP traffic will be routed to one of the 4 queues > >> (0,1,2,3), > >> While TCP traffic will only be routed to queues 2,3. > >> > >> Now I can add new rule that matches on UDP packet and RSS to queue 0 > >> and 3 in this case: > >> TCP packets will be routed to queues 0,3. > >> UDP packets will be routed to queues 2,3. > >> All the rest of the traffic will be routed to queues 0,1,2,3 > >> > >> And just to be clear if now I add a rule to match all packets in > >> higher priority, > >> with RSS to queues 1,2. Then all traffic will be routed to queues 1,2. > >> > >> At least this is what is expected, from API point of view. > >> > >> Best, > >> Ori > > Thank you for your answer. I understand it. > > hns3 PMD cannot implement the above functions due to hardware > > limitation. > > we may need add a check that specified RSS queues cannot be supported > > when specified packets types. > > And only the packet type is specified, which meets the requirements of > > rte_flow API. > > The check for the RTE_ETH_MQ_RX_RSS_FLAG flag in rte_flow is not > > correct. > > Thanks, Ori and Stephen😁 > > > > But, I think, it is necessary for the '.rss_hash_update' and > > '.reta_update' APIs > > in eth_dev_ops to verify this flag. What do you think? @Thomas, > > @Ferruh, @Ori and @Stephen. > What's your take on it? I am looking forward to your reply. Thanks! > >>> I am not sure why you want to check this flag. > >> I want to make sure that the behavior that PMD configured RSS is > >> consistent across different interfaces. The RTE_ETH_MQ_RX_RSS_FLAG > >> flag is a switch to enable RSS hash. If the switch isn't open, some > >> PMD do not configure RSS function. I think the consistency is necessary. > >> If not set RSS muti-queue mode, it is unnecessary to configure RSS. > >> > >>> I can imagine we configure the hash and the table before enabling RSS > >>> with the RTE_ETH_MQ_RX_RSS_FLAG flag. > >> The flag is derived from dev_configure() which also configures > >> hash and key. I don't think it makes sense to configure hash and > >> reta before calling dev_configure. Because they'll be updated. > >> This is similar to configuring mtu. > > OK I see your point. > > So you would like to return an error in RSS functions > > if the flag RTE_ETH_MQ_RX_RSS_FLAG is not set? > Yes > > Should it be checked in ethde
Re: [PATCH 2/6] net/hns3: fix inconsistent enabled RSS behavior
On Wed, Mar 23, 2022 at 12:04 PM Thomas Monjalon wrote: > > 23/03/2022 12:04, lihuisong (C): > > 在 2022/3/23 17:14, Thomas Monjalon 写道: > > > 23/03/2022 04:05, lihuisong (C): > > >> 在 2022/3/23 1:13, Thomas Monjalon 写道: > > >>> 21/03/2022 08:14, lihuisong (C): > > 2022/3/10 16:08, lihuisong (C): > > > 2022/3/9 17:55, Ori Kam: > > >> From: lihuisong (C) > > >>> 2022/3/3 10:47, lihuisong (C): > > 2022/3/2 22:07, Ori Kam: > > > From: lihuisong (C) > > >> 2022/3/1 0:42, Ferruh Yigit: > > >>> On 2/28/2022 3:21 AM, Min Hu (Connor) wrote: > > From: Huisong Li > > > > RSS will not be enabled if the RTE_ETH_MQ_RX_RSS_FLAG isn't be > > set in > > dev_configure phase. However, if this flag isn't set, RSS can > > be > > enabled > > through the ethdev ops and rte_flow API. This behavior is > > contrary to > > each > > other. > > > > Fixes: c37ca66f2b27 ("net/hns3: support RSS") > > Cc: sta...@dpdk.org > > > > Signed-off-by: Huisong Li > > >>> Hi Huisong, Connor, > > >>> > > >>> Let's get a little more feedback for this patch, cc'ed more > > >>> people. > > >>> > > >>> To enable RSS, multi queue mode should be set to > > >>> 'RTE_ETH_MQ_RX_RSS_FLAG'. > > >>> > > >>> But I wonder if it is required to configure RSS via flow API, > > >> I do not know the original purpose of adding the RSS > > >> configuration in > > >> flow API. > > >> > > > The purpose is simple, this allow to create RSS per rule and not a > > > global one. > > > For example create RSS that sends TCP to some queues while othe > > > RSS > > > will send > > > UDP traffic to different queues. > > I'm a little confused now. The "per rule" also seems to be a global > > configuration. > > Example: > > - start PMD with 0,1,2,3 > > - create TCP packets to 2,3 queues. At this moment, only 2,3 > > queues > > can be received for other types of packets. > > Because this rule is implemented by modifying the entry of the > > redirection table which is global for this device. > > >>> Hi, Ori and Stephen. > > >>> Can you help me clear up the confusion above? If some NICs behave > > >>> like > > >>> this, what should we do about it? > > >> I'm not sure I understand the issue, maybe it is releated to some > > >> HW/PMD limitation. > > >> In your example non TCP traffic will be routed to one of the 4 queues > > >> (0,1,2,3), > > >> While TCP traffic will only be routed to queues 2,3. > > >> > > >> Now I can add new rule that matches on UDP packet and RSS to queue 0 > > >> and 3 in this case: > > >> TCP packets will be routed to queues 0,3. > > >> UDP packets will be routed to queues 2,3. > > >> All the rest of the traffic will be routed to queues 0,1,2,3 > > >> > > >> And just to be clear if now I add a rule to match all packets in > > >> higher priority, > > >> with RSS to queues 1,2. Then all traffic will be routed to queues > > >> 1,2. > > >> > > >> At least this is what is expected, from API point of view. > > >> > > >> Best, > > >> Ori > > > Thank you for your answer. I understand it. > > > hns3 PMD cannot implement the above functions due to hardware > > > limitation. > > > we may need add a check that specified RSS queues cannot be supported > > > when specified packets types. > > > And only the packet type is specified, which meets the requirements of > > > rte_flow API. > > > The check for the RTE_ETH_MQ_RX_RSS_FLAG flag in rte_flow is not > > > correct. > > > Thanks, Ori and Stephen😁 > > > > > > But, I think, it is necessary for the '.rss_hash_update' and > > > '.reta_update' APIs > > > in eth_dev_ops to verify this flag. What do you think? @Thomas, > > > @Ferruh, @Ori and @Stephen. > > What's your take on it? I am looking forward to your reply. Thanks! > > >>> I am not sure why you want to check this flag. > > >> I want to make sure that the behavior that PMD configured RSS is > > >> consistent across different interfaces. The RTE_ETH_MQ_RX_RSS_FLAG > > >> flag is a switch to enable RSS hash. If the switch isn't open, some > > >> PMD do not configure RSS function. I think the consistency is necessary. > > >> If not set RSS muti-queue mode, it is unnecessary to configure RSS. > > >> > > >>> I can imagine we configure the hash and the table before enabling RSS > > >>> with the RTE_ETH_MQ_RX_RSS_FLAG flag. > > >> The flag is derived from dev_configure() which also configures > > >> hash and key. I don'
[PATCH v3] net/ixgbe: Retry SFP ID read field to handle misbehaving SFPs
From: Stephen Douthit Some XGS-PON SFPs have been observed ACKing I2C reads and returning uninitialized garbage while their uC boots. This can lead to the SFP ID code marking an otherwise working SFP module as unsupported if a bogus ID value is read while its internal PHY/microcontroller is still booting. Retry the ID read several times looking not just for NAK, but also for a valid ID field. Since the device isn't NAKing the trasanction the existing longer retry code in ixgbe_read_i2c_byte_generic_int() doesn't apply here. Signed-off-by: Stephen Douthit Signed-off-by: Jeff Daly --- Notes: v2: * Removed superfluous DEBUGOUT * Renamed id_reads to retries * Don't assume status == 0 means IXGBE_SUCCESS v3: * Removed extra braces around single statement if drivers/net/ixgbe/base/ixgbe_phy.c | 27 --- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/ixgbe/base/ixgbe_phy.c b/drivers/net/ixgbe/base/ixgbe_phy.c index 8d4d9bbfef..74c5db16fa 100644 --- a/drivers/net/ixgbe/base/ixgbe_phy.c +++ b/drivers/net/ixgbe/base/ixgbe_phy.c @@ -1267,6 +1267,7 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) u8 cable_tech = 0; u8 cable_spec = 0; u16 enforce_sfp = 0; + u8 retries; DEBUGFUNC("ixgbe_identify_sfp_module_generic"); @@ -1279,9 +1280,29 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) /* LAN ID is needed for I2C access */ hw->mac.ops.set_lan_id(hw); - status = hw->phy.ops.read_i2c_eeprom(hw, -IXGBE_SFF_IDENTIFIER, -&identifier); + /* Need to check this a couple of times for a sane value. +* +* SFPs that have a uC slaved to the I2C bus (vs. a dumb EEPROM) can be +* poorly designed such that they will ACK I2C reads and return +* whatever bogus data is in the SRAM (or whatever is backing the target +* device) before things are truly initialized. +* +* In a perfect world devices would NAK I2C requests until they were +* sane, but here we are. +* +* Give such devices a couple tries to get their act together before +* marking the device as unsupported. +*/ + for (retries = 0; retries < 5; retries++) { + status = hw->phy.ops.read_i2c_eeprom(hw, +IXGBE_SFF_IDENTIFIER, +&identifier); + + DEBUGOUT("status %d, SFF identifier 0x%x\n", status, identifier); + if (status == IXGBE_SUCCESS && + identifier == IXGBE_SFF_IDENTIFIER_SFP) + break; + } if (status != IXGBE_SUCCESS) goto err_read_i2c_eeprom; -- 2.25.1
Minutes of Technical Board Meeting, 2022-03-09
Members Attending: 2022-03-09 - Aaron Conole - Bruce Richardson - Ferruh Yigit - Hemant Agrawal - Honnappa Nagarahalli (Host) - Jerin Jacob - Kevin Traynor - Konstantin Ananyev - Maxime Coquelin - Olivier Matz - Stephen Hemminger - Thomas Monjalon NOTE: The Technical Board meetings take place every second Wednesday on https://meet.jit.si/DPDK at 3 pm UTC. Meetings are public, and DPDK community members are welcome to attend. Agenda and minutes can be found at http://core.dpdk.org/techboard/minutes NOTE: Next meeting will be on Wednesday 2022-03-23 @3pm UTC, and will be chaired by Aaron. 1) Index based mempool lcore cache a) Limitations - Supports 4GB of contiguous memory for buffers b) 4GB of buffer space is enough in some use cases such as smart NICs and embedded appliances c) L3fwd application does not show performance improvements. Another way to view this is, same throughput can be achieved with less amount of resources d) Mempool performance unit test shows an improvement of ~13% for bulk size of 32 on Arm platforms. On x86 there is a performance degradation, probably due to the patch not being optimized for x86 e) Suggestion is to show performance improvement possibly with other apps such as IPSec gateway which would place more pressure on cache. 2) GB meeting updates a) The outstanding security issues presented to GB are not critical. Plan to address 2 CVEs in the coming release. b) Jerin (or whoever is the TB rep) will sync up with the community on outstanding CVEs before the next GB meeting c) DTS GPL license files - look for re-writing the files, files with alternative license, keeping the files outside of the repo (use wget) d) GB needs more input from TB on ecosystem health 3) Python bindings a) Link to the presentation: https://docs.google.com/presentation/d/1PgOFo4SqjWehH_YvVkGwij1XdPGIMxEPDINUI772Yb8/edit#slide=id.p b) Owen to create a small RFC to provide a feel for changes required c) One of the concerns is the maintenance overhead these bindings will introduce Thanks, Honnappa
RE: [PATCH] net/netvsc: fix the calculation of checksums based on mbuf flag
> Subject: Re: [PATCH] net/netvsc: fix the calculation of checksums based on > mbuf flag > > On Wed, Mar 23, 2022 at 5:28 AM wrote: > > > > From: Long Li > > > > The netvsc should use RTE_MBUF_F_TX_L4_MASK and check the value to > > decide the correct way to calculate checksums. > > It's better to describe what the impact for an application is. > Do I understand correctly that UDP checksum offloading was broken? > > > I guess you want this backported. > Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device") > Cc: sta...@dpdk.org I'm sending V2 with the Fixes tag. Thanks, Long > > > > > > Signed-off-by: Long Li > > --- > > drivers/net/netvsc/hn_rxtx.c | 13 + > > 1 file changed, 9 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/net/netvsc/hn_rxtx.c > > b/drivers/net/netvsc/hn_rxtx.c index 028f176c7e..34f40be5b8 100644 > > --- a/drivers/net/netvsc/hn_rxtx.c > > +++ b/drivers/net/netvsc/hn_rxtx.c > > @@ -1348,8 +1348,11 @@ static void hn_encap(struct rndis_packet_msg > *pkt, > > *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, > >m->tso_segsz); > > } > > - } else if (m->ol_flags & > > - (RTE_MBUF_F_TX_TCP_CKSUM | > RTE_MBUF_F_TX_UDP_CKSUM | RTE_MBUF_F_TX_IP_CKSUM)) { > > + } else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > > + RTE_MBUF_F_TX_TCP_CKSUM || > > + (m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > > + RTE_MBUF_F_TX_UDP_CKSUM || > > + (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM)) { > > pi_data = hn_rndis_pktinfo_append(pkt, > NDIS_TXCSUM_INFO_SIZE, > > NDIS_PKTINFO_TYPE_CSUM); > > *pi_data = 0; > > @@ -1363,9 +1366,11 @@ static void hn_encap(struct rndis_packet_msg > *pkt, > > *pi_data |= NDIS_TXCSUM_INFO_IPCS; > > } > > > > - if (m->ol_flags & RTE_MBUF_F_TX_TCP_CKSUM) > > + if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > > + RTE_MBUF_F_TX_TCP_CKSUM) > > *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); > > - else if (m->ol_flags & RTE_MBUF_F_TX_UDP_CKSUM) > > + else if ((m->ol_flags & RTE_MBUF_F_TX_L4_MASK) == > > + RTE_MBUF_F_TX_UDP_CKSUM) > > *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); > > } > > > > -- > > 2.32.0 > > > > > -- > David Marchand
[Bug 976] rte_rib (and rte_rib6) do not handle /0 correctly
https://bugs.dpdk.org/show_bug.cgi?id=976 Bug ID: 976 Summary: rte_rib (and rte_rib6) do not handle /0 correctly Product: DPDK Version: 21.11 Hardware: All OS: All Status: UNCONFIRMED Severity: normal Priority: Normal Component: other Assignee: dev@dpdk.org Reporter: step...@networkplumber.org Target Milestone: --- The function rte_rib_insert() allows inserting 0/0 as a default route, but it is not correctly handled by the current tree code. For example lookups will never match the default route and tree traversal never finds this default route. Same bug probably exists in rte_rib6 -- You are receiving this mail because: You are the assignee for the bug.
Re: [Bug 976] rte_rib (and rte_rib6) do not handle /0 correctly
On Wed, 23 Mar 2022 22:46:02 + bugzi...@dpdk.org wrote: > https://bugs.dpdk.org/show_bug.cgi?id=976 > > Bug ID: 976 >Summary: rte_rib (and rte_rib6) do not handle /0 correctly >Product: DPDK >Version: 21.11 > Hardware: All > OS: All > Status: UNCONFIRMED > Severity: normal > Priority: Normal > Component: other > Assignee: dev@dpdk.org > Reporter: step...@networkplumber.org > Target Milestone: --- > > The function rte_rib_insert() allows inserting 0/0 as a default route, but it > is not correctly handled by the current tree code. For example lookups will > never match the default route and tree traversal never finds this default > route. > > Same bug probably exists in rte_rib6 > Here is a patch to existing RIB test that tests boundary conditions. It shows that /0 and /32 work correctly for lookup, it is just the tree traversal that is problematic. diff --git a/app/test/test_rib.c b/app/test/test_rib.c index 06058f8f7c52..403fc85efe95 100644 --- a/app/test/test_rib.c +++ b/app/test/test_rib.c @@ -307,6 +307,79 @@ test_basic(void) return TEST_SUCCESS; } +/* + * Call insert for successive depths from 0 to 32 + * and then make sure we get the most specific rule. + */ +static int32_t +test_depth(void) +{ + struct rte_rib *rib = NULL; + struct rte_rib_node *node; + const struct rte_rib_conf config = { + .max_nodes = MAX_RULES, + }; + const uint32_t ip = RTE_IPV4(192, 18, 10, 1); + uint64_t next_hop_add = 0; + uint64_t next_hop_return; + uint8_t depth; + int ret; + + rib = rte_rib_create(__func__, SOCKET_ID_ANY, &config); + RTE_TEST_ASSERT(rib != NULL, "Failed to create RIB\n"); + + for (depth = 0; depth <= MAX_DEPTH; depth++) { + node = rte_rib_insert(rib, ip, depth); + RTE_TEST_ASSERT(node != NULL, "Failed to insert rule\n"); + + ret = rte_rib_set_nh(node, next_hop_add); + RTE_TEST_ASSERT(ret == 0, + "Failed to set rte_rib_node field\n"); + + node = rte_rib_lookup_exact(rib, ip, depth); + RTE_TEST_ASSERT(node != NULL, + "Failed to lookup\n"); + + ret = rte_rib_get_nh(node, &next_hop_return); + RTE_TEST_ASSERT((ret == 0) && (next_hop_add == next_hop_return), + "Failed to get proper nexthop\n"); + ++next_hop_add; + } + + /* depth = 33 = MAX_DEPTH + 1 */ + do { + uint32_t this_ip; + uint8_t this_depth; + + --depth; + + node = rte_rib_lookup(rib, ip); + RTE_TEST_ASSERT(node != NULL, "Failed to lookup\n"); + + ret = rte_rib_get_nh(node, &next_hop_return); + RTE_TEST_ASSERT((ret == 0) && (depth == next_hop_return), + "Failed to get proper nexthop\n"); + + ret = rte_rib_get_depth(node, &this_depth); + RTE_TEST_ASSERT((ret == 0) && (this_depth == depth), + "Failed to get proper depth\n"); + + ret = rte_rib_get_ip(node, &this_ip); + RTE_TEST_ASSERT(ret == 0, "Failed to get ip\n"); + + rte_rib_remove(rib, this_ip, this_depth); + } while (depth != 0); + + /* all rules removed should return NULL now */ + node = rte_rib_lookup(rib, ip); + RTE_TEST_ASSERT(node == NULL, + "Lookup returns non existent rule\n"); + + rte_rib_free(rib); + + return TEST_SUCCESS; +} + int32_t test_tree_traversal(void) { @@ -314,9 +387,17 @@ test_tree_traversal(void) struct rte_rib_node *node; struct rte_rib_conf config; - uint32_t ip1 = RTE_IPV4(10, 10, 10, 0); - uint32_t ip2 = RTE_IPV4(10, 10, 130, 80); - uint8_t depth = 30; + uint32_t ips[] = { + RTE_IPV4(0, 0, 0, 0), /* /0 */ + RTE_IPV4(10, 10, 0, 0), /* /8 */ + RTE_IPV4(10, 11, 0, 0), /* /16 */ + RTE_IPV4(10, 10, 130, 0), /* /24 */ + RTE_IPV4(10, 10, 130, 9), /* /32 */ + }; + unsigned int count; + uint32_t ip; + uint8_t depth; + int ret; config.max_nodes = MAX_RULES; config.ext_sz = 0; @@ -324,16 +405,44 @@ test_tree_traversal(void) rib = rte_rib_create(__func__, SOCKET_ID_ANY, &config); RTE_TEST_ASSERT(rib != NULL, "Failed to create RIB\n"); - node = rte_rib_insert(rib, ip1, depth); - RTE_TEST_ASSERT(node != NULL, "Failed to insert rule\n"); + for (count = 0; count < RTE_DIM(ips); count++) { + depth = count * 8; - node = rte_rib_insert(rib, ip2, depth); - RTE_TEST_ASSERT(node != NULL, "Faile
RE: [PATCH v3] net/ixgbe: Retry SFP ID read field to handle misbehaving SFPs
> -Original Message- > From: je...@silicom-usa.com > Sent: Thursday, March 24, 2022 04:04 > To: dev@dpdk.org > Cc: Stephen Douthit ; Daly, Jeff > ; Wang, Haiyue > > Subject: [PATCH v3] net/ixgbe: Retry SFP ID read field to handle misbehaving > SFPs > > From: Stephen Douthit > > Some XGS-PON SFPs have been observed ACKing I2C reads and returning > uninitialized garbage while their uC boots. This can lead to the SFP ID > code marking an otherwise working SFP module as unsupported if a bogus > ID value is read while its internal PHY/microcontroller is still > booting. > > Retry the ID read several times looking not just for NAK, but also for a > valid ID field. > > Since the device isn't NAKing the trasanction the existing longer retry > code in ixgbe_read_i2c_byte_generic_int() doesn't apply here. > > Signed-off-by: Stephen Douthit > Signed-off-by: Jeff Daly > --- > > Notes: > v2: > * Removed superfluous DEBUGOUT > * Renamed id_reads to retries > * Don't assume status == 0 means IXGBE_SUCCESS > > v3: > * Removed extra braces around single statement if > > drivers/net/ixgbe/base/ixgbe_phy.c | 27 --- > 1 file changed, 24 insertions(+), 3 deletions(-) > Thanks! Reviewed-by: Haiyue Wang > -- > 2.25.1
[Bug 977] [dpdk-19.11.12-rc1] meson and make build Error on Fedora35-64 and Ubuntu2110-64 with gcc11.2.1 and gcc11.2.0
https://bugs.dpdk.org/show_bug.cgi?id=977 Bug ID: 977 Summary: [dpdk-19.11.12-rc1] meson and make build Error on Fedora35-64 and Ubuntu2110-64 with gcc11.2.1 and gcc11.2.0 Product: DPDK Version: 19.11 Hardware: All OS: All Status: UNCONFIRMED Severity: normal Priority: Normal Component: core Assignee: dev@dpdk.org Reporter: daxuex@intel.com Target Milestone: --- [DPDK version]: c511bb7787 (HEAD, tag: v19.11.12-rc1, origin/19.11) [OS version]: Fedora 35-64/5.14.16-301.fc35.x86_64 GCC Version: 11.2.1 20210728 (Red Hat 11.2.1-1) UB2110-64 /5.13.0-19-generic GCC Version: gcc (Ubuntu 11.2.0-7ubuntu2) 11.2.0 [Make Test Setup]: export RTE_TARGET=x86_64-native-linuxapp-gcc export RTE_SDK=`pwd` echo "CONFIG_RTE_EAL_IGB_UIO=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_BNX2X_PMD=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_PMD_PCAP=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_PMD_QAT=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_PMD_QAT_SYM=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_PMD_AESNI_MB=y" >> config/common_base echo "CONFIG_RTE_LIBRTE_PMD_OPENSSL=y" >> config/common_base make -j 20 install T=x86_64-native-linuxapp-gcc [Meson Test setup] CC=gcc meson --werror -Denable_kmods=True -Dlibdir=lib -Dexamples=all --default-library=static x86_64-native-linuxapp-gcc ninja -j 10 -C x86_64-native-linuxapp-gcc [UB2110 Make log as below] In function ‘snprintf’, inlined from ‘rte_strlcpy’ at /root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/x86_64-native-linuxapp-gcc/include/rte_string_fns.h:61:17, inlined from ‘bnxt_dev_xstats_get_names_op’ at /root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/drivers/net/bnxt/bnxt_stats.c:632:2: /usr/include/x86_64-linux-gnu/bits/stdio2.h:71:10: error: ‘__builtin_memcpy’ offset [0, 12] is out of the bounds [0, 0] [-Werror=array-bounds] 71 | return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, | ^~~~ 72 |__glibc_objsize (__s), __fmt, -- 73 |__va_arg_pack ()); |~ cc1: all warnings being treated as errors make[6]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/internal/rte.compile-pre.mk:116: bnxt_stats.o] Error 1 make[6]: *** Waiting for unfinished jobs CC tap_flow.o make[5]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.subdir.mk:35: bnxt] Error 2 make[5]: *** Waiting for unfinished jobs CC sfc_flow.o CC ixgbe_rxtx_vec_sse.o -- CC medford2_nic.o AR librte_pmd_sfc_efx.a INSTALL-LIB librte_pmd_sfc_efx.a make[4]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.subdir.mk:35: net] Error 2 make[3]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.sdkbuild.mk:46: drivers] Error 2 make[2]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.sdkroot.mk:99: all] Error 2 make[1]: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.sdkinstall.mk:61: pre_install] Error 2 make: *** [/root/UB2110-64_K5.13.0_GCC11.2.0/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.sdkroot.mk:77: install] Error 2 DPDK STV team [Fedora Make log as below] In file included from /root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/drivers/net/bnxt/bnxt_stats.c:8: In function ‘rte_strlcpy’, inlined from ‘bnxt_dev_xstats_get_names_op’ at /root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/drivers/net/bnxt/bnxt_stats.c:632:2: /root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/x86_64-native-linuxapp-gcc/include/rte_string_fns.h:61:24: error: ‘__builtin_memcpy’ offset [0, 12] is out of the bounds [0, 0] [-Werror=array-bounds] 61 | return (size_t)snprintf(dst, size, "%s", src); |^~ cc1: all warnings being treated as errors make[6]: *** [/root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/internal/rte.compile-pre.mk:116: bnxt_stats.o] Error 1 make[5]: *** [/root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.subdir.mk:35: bnxt] Error 2 make[5]: *** Waiting for unfinished jobs CC otx2_mcast.o PMDINFO rte_eth_kni.o.pmd.c -- CC medford2_nic.o AR librte_pmd_sfc_efx.a INSTALL-LIB librte_pmd_sfc_efx.a make[4]: *** [/root/FC35-64_K5.14.16_GCC11.2.1/x86_64-native-linuxapp-gcc/20220323141024/dpdk/mk/rte.subdir.mk:35: net] Error 2 ma
[PATCH] net/ice: fix error set of queue number
The queue number actually applied should be the maximum integer power of 2 less than or equal to min(vsi->nb_qps, ICE_MAX_Q_PER_TC), so we need to get the most significant 1 bit. However the return value of function rte_bsf32 is the least significant 1 bit. This patch replaces the function rte_bsf32 with the function rte_fls_u32 and adds necessary boundary check. Signed-off-by: Wenjun Wu --- drivers/net/ice/ice_ethdev.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 13adcf90ed..73e550f5fb 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -808,7 +808,7 @@ ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi, struct ice_aqc_vsi_props *info, uint8_t enabled_tcmap) { - uint16_t bsf, qp_idx; + uint16_t fls, qp_idx; /* default tc 0 now. Multi-TC supporting need to be done later. * Configure TC and queue mapping parameters, for enabled TC, @@ -820,15 +820,15 @@ ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi, } vsi->nb_qps = RTE_MIN(vsi->nb_qps, ICE_MAX_Q_PER_TC); - bsf = rte_bsf32(vsi->nb_qps); + fls = (vsi->nb_qps == 0) ? 0 : rte_fls_u32(vsi->nb_qps) - 1; /* Adjust the queue number to actual queues that can be applied */ - vsi->nb_qps = 0x1 << bsf; + vsi->nb_qps = (vsi->nb_qps == 0) ? 0 : 0x1 << fls; qp_idx = 0; /* Set tc and queue mapping with VSI */ info->tc_mapping[0] = rte_cpu_to_le_16((qp_idx << ICE_AQ_VSI_TC_Q_OFFSET_S) | - (bsf << ICE_AQ_VSI_TC_Q_NUM_S)); + (fls << ICE_AQ_VSI_TC_Q_NUM_S)); /* Associate queue number with VSI */ info->mapping_flags |= rte_cpu_to_le_16(ICE_AQ_VSI_Q_MAP_CONTIG); -- 2.25.1
RE: DPDK seqlock
> > Hi Mattias, > > > > > Would it make sense to have a seqlock implementation in DPDK? I do not have any issues with adding the seqlock to DPDK. However, I am interested in understanding the use case. As I understand, seqlock is a type of reader-writer lock. This means that it is possible that readers (data plane) may be blocked till the writer completes the updates. Does not this mean, data plane might drop packets while the writer is updating entries? > > > > I think so, since it's a very useful synchronization primitive in data > > plane applications. > > > > Agree, it might be useful. > As I remember rte_hash '_lf' functions do use something similar to seqlock, > but > in hand-made manner. > Probably some other entities within DPDK itself or related projects will > benefit > from it too... > > Konstantin
Re: DPDK seqlock
On Thu, 24 Mar 2022 04:52:07 + Honnappa Nagarahalli wrote: > > > > > > Hi Mattias, > > > > > > > > Would it make sense to have a seqlock implementation in DPDK? > I do not have any issues with adding the seqlock to DPDK. > > However, I am interested in understanding the use case. As I understand, > seqlock is a type of reader-writer lock. This means that it is possible that > readers (data plane) may be blocked till the writer completes the updates. > Does not this mean, data plane might drop packets while the writer is > updating entries? > > > > > > > I think so, since it's a very useful synchronization primitive in data > > > plane applications. > > > > > > > Agree, it might be useful. > > As I remember rte_hash '_lf' functions do use something similar to seqlock, > > but > > in hand-made manner. > > Probably some other entities within DPDK itself or related projects will > > benefit > > from it too... > > > > Konstantin As inventor of seqlock, it is really just a kind of reader/writer spinlock where spinning trys to do useful work. It useful for cases where the data being accessed is too large for __atomic primitives.
[PATCH] ethtool: correct format strings according to the arguments
The corrected format strings are not consistent with the given arguments. So they are changed accordingly. Signed-off-by: huzaifa.rahman --- examples/ethtool/ethtool-app/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/ethtool/ethtool-app/main.c b/examples/ethtool/ethtool-app/main.c index 1f011a9321..ea8332e49c 100644 --- a/examples/ethtool/ethtool-app/main.c +++ b/examples/ethtool/ethtool-app/main.c @@ -208,7 +208,7 @@ static int worker_main(__rte_unused void *ptr_data) &ptr_port->mac_addr); if (ret != 0) { rte_spinlock_unlock(&ptr_port->lock); - printf("Failed to get MAC address (port %u): %s", + printf("Failed to get MAC address (port %i): %s", ptr_port->idx_port, rte_strerror(-ret)); return ret; @@ -284,11 +284,11 @@ int main(int argc, char **argv) rte_exit(EXIT_FAILURE, "rte_eal_init(): Failed"); cnt_ports = rte_eth_dev_count_avail(); - printf("Number of NICs: %i\n", cnt_ports); + printf("Number of NICs: %u\n", cnt_ports); if (cnt_ports == 0) rte_exit(EXIT_FAILURE, "No available NIC ports!\n"); if (cnt_ports > MAX_PORTS) { - printf("Info: Using only %i of %i ports\n", + printf("Info: Using only %u of %i ports\n", cnt_ports, MAX_PORTS ); cnt_ports = MAX_PORTS; -- 2.25.1
[PATCH v1] net/ice: support 256 queues
256 queues can be allowed now. This patch improves the code to support 256 queues for per PF. Signed-off-by: Wenjun Wu --- drivers/net/ice/ice_ethdev.c | 8 drivers/net/ice/ice_ethdev.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index 13adcf90ed..73e550f5fb 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -808,7 +808,7 @@ ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi, struct ice_aqc_vsi_props *info, uint8_t enabled_tcmap) { - uint16_t bsf, qp_idx; + uint16_t fls, qp_idx; /* default tc 0 now. Multi-TC supporting need to be done later. * Configure TC and queue mapping parameters, for enabled TC, @@ -820,15 +820,15 @@ ice_vsi_config_tc_queue_mapping(struct ice_vsi *vsi, } vsi->nb_qps = RTE_MIN(vsi->nb_qps, ICE_MAX_Q_PER_TC); - bsf = rte_bsf32(vsi->nb_qps); + fls = (vsi->nb_qps == 0) ? 0 : rte_fls_u32(vsi->nb_qps) - 1; /* Adjust the queue number to actual queues that can be applied */ - vsi->nb_qps = 0x1 << bsf; + vsi->nb_qps = (vsi->nb_qps == 0) ? 0 : 0x1 << fls; qp_idx = 0; /* Set tc and queue mapping with VSI */ info->tc_mapping[0] = rte_cpu_to_le_16((qp_idx << ICE_AQ_VSI_TC_Q_OFFSET_S) | - (bsf << ICE_AQ_VSI_TC_Q_NUM_S)); + (fls << ICE_AQ_VSI_TC_Q_NUM_S)); /* Associate queue number with VSI */ info->mapping_flags |= rte_cpu_to_le_16(ICE_AQ_VSI_Q_MAP_CONTIG); diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h index 3ed580d438..09cfb60b0f 100644 --- a/drivers/net/ice/ice_ethdev.h +++ b/drivers/net/ice/ice_ethdev.h @@ -21,8 +21,8 @@ #define ICE_ADMINQ_BUF_SZ4096 #define ICE_SBIOQ_BUF_SZ 4096 #define ICE_MAILBOXQ_BUF_SZ 4096 -/* Number of queues per TC should be one of 1, 2, 4, 8, 16, 32, 64 */ -#define ICE_MAX_Q_PER_TC 64 +/* Number of queues per TC should be one of 1, 2, 4, 8, 16, 32, 64, 128, 256 */ +#define ICE_MAX_Q_PER_TC 256 #define ICE_NUM_DESC_DEFAULT 512 #define ICE_BUF_SIZE_MIN 1024 #define ICE_FRAME_SIZE_MAX 9728 -- 2.25.1