Re: [dpdk-dev] [PATCH 1/3] rte_ethdev: Add API function to read dev clock
Ferruh Yigit wrote : > Is this a common enough feature to include into ethdev abstraction layer? Or a > feature for a single vendor? I found reference to mbuf’s timestamp field only in MLX5. I think it is the only one to support timestamp offloading. This new API is only useful to make sense out of the timestamp value. And without this patch, timestamp offloading is completely useless… What would be the other way ? Define something in mlx5 header and ask clients to check for the driver and call the specific API ? I see reference to timestamp offloading in Netronome Agilio, CC-ing maintainers. Is timestamp offloading a feature you could potentially provide ? Would it be host time reference or a value that need conversion with an API like this? Tom
Re: [dpdk-dev] [PATCH] eal: fix core number validation
On Thu, Dec 20, 2018 at 11:01 AM Hari Kumar Vemula < hari.kumarx.vem...@intel.com> wrote: > When incorrect core value or range provided, > as part of -l command line option, a crash occurs. > > Added valid range checks to fix the crash. > > Fixes: d888cb8b9613 ("eal: add core list input format") > Cc: sta...@dpdk.org > > Signed-off-by: Hari Kumar Vemula > Thanks for reporting. I agree that some validation steps are missing, but I tried a little bit and did not reproduce a crash. On my 8 cores system: [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 567890,567891,567892 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) [...] EAL: invalid core list Usage: ./master/app/testpmd [options] etc... [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 2,3,-1 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) EAL: Detected 1 NUMA nodes [...] Done testpmd> Bye... Idem with [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 2,3,567890 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) EAL: Detected 1 NUMA nodes [...] Done testpmd> Bye... Since you have identified a potential crash, can you give an example of such a crash ? Besides, we have tests that check arguments, so an update of the test would be nice. Thanks. -- David Marchand
Re: [dpdk-dev] [PATCH v3 3/3] net/virtio: improve batching in mergeable path
On 12/21/18 7:27 AM, Gavin Hu (Arm Technology China) wrote: -Original Message- From: dev On Behalf Of Maxime Coquelin Sent: Friday, December 21, 2018 1:27 AM To: dev@dpdk.org; jfreim...@redhat.com; tiwei@intel.com; zhihong.w...@intel.com Cc: Maxime Coquelin Subject: [dpdk-dev] [PATCH v3 3/3] net/virtio: improve batching in mergeable path This patch improves both descriptors dequeue and refill, by using the same batching strategy as done in in-order path. Signed-off-by: Maxime Coquelin Tested-by: Jens Freimann Reviewed-by: Jens Freimann --- drivers/net/virtio/virtio_rxtx.c | 239 +-- 1 file changed, 129 insertions(+), 110 deletions(-) diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 58376ced3..1cfa2f0d6 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -353,41 +353,44 @@ virtqueue_enqueue_refill_inorder(struct virtqueue *vq, } static inline int -virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) +virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf **cookie, + uint16_t num) { struct vq_desc_extra *dxp; struct virtio_hw *hw = vq->hw; - struct vring_desc *start_dp; - uint16_t needed = 1; - uint16_t head_idx, idx; + struct vring_desc *start_dp = vq->vq_ring.desc; + uint16_t idx, i; if (unlikely(vq->vq_free_cnt == 0)) return -ENOSPC; - if (unlikely(vq->vq_free_cnt < needed)) + if (unlikely(vq->vq_free_cnt < num)) return -EMSGSIZE; - head_idx = vq->vq_desc_head_idx; - if (unlikely(head_idx >= vq->vq_nentries)) + if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries)) return -EFAULT; - idx = head_idx; - dxp = &vq->vq_descx[idx]; - dxp->cookie = (void *)cookie; - dxp->ndescs = needed; + for (i = 0; i < num; i++) { + idx = vq->vq_desc_head_idx; + dxp = &vq->vq_descx[idx]; + dxp->cookie = (void *)cookie[i]; + dxp->ndescs = 1; - start_dp = vq->vq_ring.desc; - start_dp[idx].addr = - VIRTIO_MBUF_ADDR(cookie, vq) + - RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; - start_dp[idx].len = - cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw- vtnet_hdr_size; - start_dp[idx].flags = VRING_DESC_F_WRITE; - idx = start_dp[idx].next; - vq->vq_desc_head_idx = idx; - if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) - vq->vq_desc_tail_idx = idx; - vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); - vq_update_avail_ring(vq, head_idx); + start_dp[idx].addr = + VIRTIO_MBUF_ADDR(cookie[i], vq) + + RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; + start_dp[idx].len = + cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM + + hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_WRITE; + vq->vq_desc_head_idx = start_dp[idx].next; + vq_update_avail_ring(vq, idx); + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) { + vq->vq_desc_tail_idx = vq->vq_desc_head_idx; + break; + } + } + + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); return 0; } @@ -892,7 +895,8 @@ virtio_dev_rx_queue_setup_finish(struct rte_eth_dev *dev, uint16_t queue_idx) error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1); else - error = virtqueue_enqueue_recv_refill(vq, m); + error = virtqueue_enqueue_recv_refill(vq, + &m, 1); if (error) { rte_pktmbuf_free(m); break; @@ -991,7 +995,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) if (vtpci_packed_queue(vq->hw)) error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1); else - error = virtqueue_enqueue_recv_refill(vq, m); + error = virtqueue_enqueue_recv_refill(vq, &m, 1); if (unlikely(error)) { RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); @@ -1211,7 +1215,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) dev->data->rx_mbuf_alloc_failed++; break; } - error = virtqueue_enqueue_recv_refill(vq, new_mbuf); + error = virtqueue_enqueue_recv_refill(vq, &new_mbuf, 1); if (unlikely(error)) {
Re: [dpdk-dev] [PATCH v3 3/3] net/virtio: improve batching in mergeable path
> -Original Message- > From: Maxime Coquelin > Sent: Friday, December 21, 2018 4:36 PM > To: Gavin Hu (Arm Technology China) ; > dev@dpdk.org; jfreim...@redhat.com; tiwei@intel.com; > zhihong.w...@intel.com > Cc: nd > Subject: Re: [dpdk-dev] [PATCH v3 3/3] net/virtio: improve batching in > mergeable path > > > > On 12/21/18 7:27 AM, Gavin Hu (Arm Technology China) wrote: > > > > > >> -Original Message- > >> From: dev On Behalf Of Maxime Coquelin > >> Sent: Friday, December 21, 2018 1:27 AM > >> To: dev@dpdk.org; jfreim...@redhat.com; tiwei@intel.com; > >> zhihong.w...@intel.com > >> Cc: Maxime Coquelin > >> Subject: [dpdk-dev] [PATCH v3 3/3] net/virtio: improve batching in > >> mergeable path > >> > >> This patch improves both descriptors dequeue and refill, > >> by using the same batching strategy as done in in-order path. > >> > >> Signed-off-by: Maxime Coquelin > >> Tested-by: Jens Freimann > >> Reviewed-by: Jens Freimann > >> --- > >> drivers/net/virtio/virtio_rxtx.c | 239 +-- > >> 1 file changed, 129 insertions(+), 110 deletions(-) > >> > >> diff --git a/drivers/net/virtio/virtio_rxtx.c > b/drivers/net/virtio/virtio_rxtx.c > >> index 58376ced3..1cfa2f0d6 100644 > >> --- a/drivers/net/virtio/virtio_rxtx.c > >> +++ b/drivers/net/virtio/virtio_rxtx.c > >> @@ -353,41 +353,44 @@ virtqueue_enqueue_refill_inorder(struct > >> virtqueue *vq, > >> } > >> > >> static inline int > >> -virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf > >> *cookie) > >> +virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf > >> **cookie, > >> + uint16_t num) > >> { > >>struct vq_desc_extra *dxp; > >>struct virtio_hw *hw = vq->hw; > >> - struct vring_desc *start_dp; > >> - uint16_t needed = 1; > >> - uint16_t head_idx, idx; > >> + struct vring_desc *start_dp = vq->vq_ring.desc; > >> + uint16_t idx, i; > >> > >>if (unlikely(vq->vq_free_cnt == 0)) > >>return -ENOSPC; > >> - if (unlikely(vq->vq_free_cnt < needed)) > >> + if (unlikely(vq->vq_free_cnt < num)) > >>return -EMSGSIZE; > >> > >> - head_idx = vq->vq_desc_head_idx; > >> - if (unlikely(head_idx >= vq->vq_nentries)) > >> + if (unlikely(vq->vq_desc_head_idx >= vq->vq_nentries)) > >>return -EFAULT; > >> > >> - idx = head_idx; > >> - dxp = &vq->vq_descx[idx]; > >> - dxp->cookie = (void *)cookie; > >> - dxp->ndescs = needed; > >> + for (i = 0; i < num; i++) { > >> + idx = vq->vq_desc_head_idx; > >> + dxp = &vq->vq_descx[idx]; > >> + dxp->cookie = (void *)cookie[i]; > >> + dxp->ndescs = 1; > >> > >> - start_dp = vq->vq_ring.desc; > >> - start_dp[idx].addr = > >> - VIRTIO_MBUF_ADDR(cookie, vq) + > >> - RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; > >> - start_dp[idx].len = > >> - cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw- > >>> vtnet_hdr_size; > >> - start_dp[idx].flags = VRING_DESC_F_WRITE; > >> - idx = start_dp[idx].next; > >> - vq->vq_desc_head_idx = idx; > >> - if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) > >> - vq->vq_desc_tail_idx = idx; > >> - vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); > >> - vq_update_avail_ring(vq, head_idx); > >> + start_dp[idx].addr = > >> + VIRTIO_MBUF_ADDR(cookie[i], vq) + > >> + RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; > >> + start_dp[idx].len = > >> + cookie[i]->buf_len - RTE_PKTMBUF_HEADROOM + > >> + hw->vtnet_hdr_size; > >> + start_dp[idx].flags = VRING_DESC_F_WRITE; > >> + vq->vq_desc_head_idx = start_dp[idx].next; > >> + vq_update_avail_ring(vq, idx); > >> + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) > >> { > >> + vq->vq_desc_tail_idx = vq->vq_desc_head_idx; > >> + break; > >> + } > >> + } > >> + > >> + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num); > >> > >>return 0; > >> } > >> @@ -892,7 +895,8 @@ virtio_dev_rx_queue_setup_finish(struct > >> rte_eth_dev *dev, uint16_t queue_idx) > >>error = > >> virtqueue_enqueue_recv_refill_packed(vq, > >>&m, 1); > >>else > >> - error = virtqueue_enqueue_recv_refill(vq, > >> m); > >> + error = virtqueue_enqueue_recv_refill(vq, > >> + &m, 1); > >>if (error) { > >>rte_pktmbuf_free(m); > >>break; > >> @@ -991,7 +995,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct > >> rte_mbuf *m) > >>if (vtpci_packed_queue(vq->hw)) > >>error = virtqueue_enqueue_recv_refill_packed(vq, &m, 1); > >>else > >> - error = virtqueue_enqueue_recv_refill(vq, m); > >> +
[dpdk-dev] [PATCH] compress/qat: fix return correct status on qat overflow
This patch fixes correct status in case of overflow on QAT is detected. In that case RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED value is set in rte_comp_op.status field instead of RTE_COMP_OP_STATUS_ERROR Fixes: 32842f2a6d7d ("compress/qat: create FW request and process response") Cc: sta...@dpdk.org Signed-off-by: Tomasz Jozwiak --- drivers/compress/qat/qat_comp.c | 34 +++--- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/compress/qat/qat_comp.c b/drivers/compress/qat/qat_comp.c index bb00610..4edf4c8 100644 --- a/drivers/compress/qat/qat_comp.c +++ b/drivers/compress/qat/qat_comp.c @@ -117,6 +117,9 @@ qat_comp_process_response(void **op, uint8_t *resp, uint64_t *dequeue_err_count) (resp_msg->opaque_data); struct qat_comp_xform *qat_xform = (struct qat_comp_xform *) (rx_op->private_xform); + int err = resp_msg->comn_resp.comn_status & + ((1 << QAT_COMN_RESP_CMP_STATUS_BITPOS) | +(1 << QAT_COMN_RESP_XLAT_STATUS_BITPOS)); #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG QAT_DP_LOG(DEBUG, "Direction: %s", @@ -140,21 +143,30 @@ qat_comp_process_response(void **op, uint8_t *resp, uint64_t *dequeue_err_count) } } - if ((ICP_QAT_FW_COMN_RESP_CMP_STAT_GET(resp_msg->comn_resp.comn_status) - | ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET( - resp_msg->comn_resp.comn_status)) != - ICP_QAT_FW_COMN_STATUS_FLAG_OK) { - - if (unlikely((ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET( - resp_msg->comn_resp.comn_status) != - ICP_QAT_FW_COMN_STATUS_FLAG_OK) && - (qat_xform->qat_comp_request_type - == QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS))) + if (err) { + if (unlikely((err & (1 << QAT_COMN_RESP_XLAT_STATUS_BITPOS)) +&& (qat_xform->qat_comp_request_type +== QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS))) { QAT_DP_LOG(ERR, "QAT intermediate buffer may be too " "small for output, try configuring a larger size"); + } + + int8_t cmp_err_code = + (int8_t)resp_msg->comn_resp.comn_error.cmp_err_code; + int8_t xlat_err_code = + (int8_t)resp_msg->comn_resp.comn_error.xlat_err_code; + + if ((cmp_err_code == ERR_CODE_OVERFLOW_ERROR && !xlat_err_code) + || + (!cmp_err_code && xlat_err_code == ERR_CODE_OVERFLOW_ERROR) + || + ((cmp_err_code | xlat_err_code) == ERR_CODE_OVERFLOW_ERROR)) + rx_op->status = + RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED; + else + rx_op->status = RTE_COMP_OP_STATUS_ERROR; ++(*dequeue_err_count); - rx_op->status = RTE_COMP_OP_STATUS_ERROR; rx_op->debug_status = *((uint16_t *)(&resp_msg->comn_resp.comn_error)); } else { -- 2.7.4
[dpdk-dev] [PATCH] compress/qat: add compression on dh895x
This patch enables compression on dh895x HW series and updates supported hardware accelerator devices list. Signed-off-by: Tomasz Jozwiak --- doc/guides/compressdevs/qat_comp.rst | 1 + drivers/compress/qat/qat_comp.c | 2 +- drivers/compress/qat/qat_comp_pmd.c | 4 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/guides/compressdevs/qat_comp.rst b/doc/guides/compressdevs/qat_comp.rst index aee3b99..1c24ebb 100644 --- a/doc/guides/compressdevs/qat_comp.rst +++ b/doc/guides/compressdevs/qat_comp.rst @@ -9,6 +9,7 @@ support for the following hardware accelerator devices: * ``Intel QuickAssist Technology C62x`` * ``Intel QuickAssist Technology C3xxx`` +* ``Intel QuickAssist Technology dh895x`` Features diff --git a/drivers/compress/qat/qat_comp.c b/drivers/compress/qat/qat_comp.c index bb00610..e745d13 100644 --- a/drivers/compress/qat/qat_comp.c +++ b/drivers/compress/qat/qat_comp.c @@ -270,7 +270,7 @@ static int qat_comp_create_templates(struct qat_comp_xform *qat_xform, ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST, ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST, ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST, - ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_USED_AS_INTMD_BUF); + ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF); comp_req->cd_pars.sl.comp_slice_cfg_word[0] = ICP_QAT_HW_COMPRESSION_CONFIG_BUILD( diff --git a/drivers/compress/qat/qat_comp_pmd.c b/drivers/compress/qat/qat_comp_pmd.c index ea93077..27c8856 100644 --- a/drivers/compress/qat/qat_comp_pmd.c +++ b/drivers/compress/qat/qat_comp_pmd.c @@ -484,10 +484,6 @@ static const struct rte_driver compdev_qat_driver = { int qat_comp_dev_create(struct qat_pci_device *qat_pci_dev) { - if (qat_pci_dev->qat_dev_gen == QAT_GEN1) { - QAT_LOG(ERR, "Compression PMD not supported on QAT dh895xcc"); - return 0; - } if (qat_pci_dev->qat_dev_gen == QAT_GEN3) { QAT_LOG(ERR, "Compression PMD not supported on QAT c4xxx"); return 0; -- 2.7.4
Re: [dpdk-dev] [PATCH v3 0/4] Allow using external memory without malloc
On 20-Dec-18 5:18 PM, Thomas Monjalon wrote: 20/12/2018 17:16, Stephen Hemminger: On Thu, 20 Dec 2018 15:32:37 + Anatoly Burakov wrote: Currently, the only way to use externally allocated memory is through rte_malloc API's. While this is fine for a lot of use cases, it may not be suitable for certain other use cases like manual memory management, etc. This patchset adds another API to register memory segments with DPDK (so that API's like ``rte_mem_virt2memseg`` could be relied on by PMD's and such), but not create a malloc heap out of them. Aside from the obvious (not adding memory to a heap), the other major difference between this API and the ``rte_malloc_heap_*`` external memory functions is the fact that no DMA mapping is performed automatically, as well as no mem event callbacks are triggered. This really draws a line in the sand, and there are now two ways of doing things - do everything automatically (using the ``rte_malloc_heap_*`` API's), or do everything manually (``rte_extmem_*`` and future DMA mapping API [1] that would replace ``rte_vfio_dma_map``). This way, the consistency of API is kept, and flexibility is also allowed. [1] https://mails.dpdk.org/archives/dev/2018-November/118175.html Where are the examples for this? Give a sample application maybe. Also there are no test cases. It looks to be a big task, but yes, would be nice to have test of external memory allocation in DPDK unit tests. I imagine if i submitted patches for this, since it's test code, it can go into rc1? Or is that considered a "feature"? I don't think it will be a lot of code, there are only 4 new API calls. Extending extmem autotest should do the trick. Adding a new testpmd mode is also possible but less trivial, and can be postponed to 19.05. -- Thanks, Anatoly
Re: [dpdk-dev] [PATCH] eal: fix core number validation
On 21-Dec-18 8:27 AM, David Marchand wrote: On Thu, Dec 20, 2018 at 11:01 AM Hari Kumar Vemula < hari.kumarx.vem...@intel.com> wrote: When incorrect core value or range provided, as part of -l command line option, a crash occurs. Added valid range checks to fix the crash. Fixes: d888cb8b9613 ("eal: add core list input format") Cc: sta...@dpdk.org Signed-off-by: Hari Kumar Vemula Thanks for reporting. I agree that some validation steps are missing, but I tried a little bit and did not reproduce a crash. On my 8 cores system: [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 567890,567891,567892 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) [...] EAL: invalid core list Usage: ./master/app/testpmd [options] etc... [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 2,3,-1 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) EAL: Detected 1 NUMA nodes [...] Done testpmd> Bye... Idem with [dmarchan@dmarchan dpdk]$ ./master/app/testpmd --no-huge -l 2,3,567890 -m 512 --log-level *:debug -- -i --total-num-mbufs 2048 [...] EAL: Support maximum 128 logical core(s) by configuration. EAL: Detected 8 lcore(s) EAL: Detected 1 NUMA nodes [...] Done testpmd> Bye... Since you have identified a potential crash, can you give an example of such a crash ? Besides, we have tests that check arguments, so an update of the test would be nice. Thanks. I believe these lcore numbers are used to index the lcore list later, which would cause out-of-bounds access, which may or may not cause a crash, depending on how lucky you get. -- Thanks, Anatoly
Re: [dpdk-dev] [PATCH v3 0/3] net/virtio: Rx paths improvements
On 12/20/18 6:27 PM, Maxime Coquelin wrote: First version of this series did merge out-of-order mergeable and non-mergeable receive paths, but Intel STV team highlighted some performance regression when using multiqueue with two cores enqueueing descs on host, while a single core dequeues the two queues. I didn't manage to close the performance gap, so I decided to give-up this refactoring. But while trying to optimize, I reworked the meargeable function so that it looks like the in-order one. I.e. descriptors are now dequeued in batches, so are descriptors refilled. Doing that, I measure a perfromance gain of 6% when doing rxonly microbenchmark with two cores on host, one in guest. Changes since v2: - Rebase after Jens' packed ring series - Break refill loop if VQ_RING_DESC_CHAIN_END (Tiwei) - Fixup commit message Maxime Coquelin (3): net/virtio: inline refill and offload helpers net/virtio: add non-mergeable support to in-order path net/virtio: improve batching in mergeable path drivers/net/virtio/virtio_ethdev.c | 14 +- drivers/net/virtio/virtio_ethdev.h | 2 +- drivers/net/virtio/virtio_rxtx.c | 257 - 3 files changed, 145 insertions(+), 128 deletions(-) Applied to dpdk-next-virtio Maxime
Re: [dpdk-dev] [PATCH v4] vhost: batch used descs chains write-back with packed ring
On 12/20/18 5:47 PM, Maxime Coquelin wrote: Instead of writing back descriptors chains in order, let's write the first chain flags last in order to improve batching. Also, move the write barrier in logging cache sync, so that it is done only when logging is enabled. It means there is now one more barrier for split ring when logging is enabled. With Kernel's pktgen benchmark, ~3% performance gain is measured. Signed-off-by: Maxime Coquelin --- lib/librte_vhost/vhost.h | 2 ++ lib/librte_vhost/virtio_net.c | 19 --- 2 files changed, 18 insertions(+), 3 deletions(-) Applied to dpdk-next-virtio Maxime
Re: [dpdk-dev] [PATCH] eal: fix core number validation
On Fri, Dec 21, 2018 at 10:19 AM Burakov, Anatoly wrote: > On 21-Dec-18 8:27 AM, David Marchand wrote: > > Since you have identified a potential crash, can you give an example of > > such a crash ? > > Besides, we have tests that check arguments, so an update of the test > would > > be nice. > > I believe these lcore numbers are used to index the lcore list later, > which would cause out-of-bounds access, which may or may not cause a > crash, depending on how lucky you get. > Sure, that and the fact that my testpmd was doing nothing with them :-). Anyway, the unit tests missed this case, so we need an update. And the more I look at those string parsing, the more I think that the service cores parsing has the same issue (copy/paste yay ;-)). -- David Marchand
[dpdk-dev] 18.08.1 patches review and test
Hi all, Here is a list of patches targeted for stable release 18.08.1. Please help review and test. The planned date for the final release is 16th January. Before that, please shout if anyone has objections with these patches being applied. Also for the companies committed to running regression tests, please run the tests and report any issue before the release date. A release candidate tarball can be found at: https://dpdk.org/browse/dpdk-stable/tag/?id=v18.08.1-rc2 These patches are located at branch 18.08 of dpdk-stable repo: https://dpdk.org/browse/dpdk-stable/ Thanks. Kevin Traynor --- Adrien Mazarguil (1): net/mlx5: fix artificial L4 limitation on switch flow rules Agalya Babu RadhaKrishnan (4): vfio: disable in FreeBSD build with meson net/nfp: disable in FreeBSD build with meson net/avp: disable in FreeBSD build with meson net/softnic: disable in FreeBSD build with meson Ajit Khaparde (4): net/bnxt: fix MTU setting net/bnxt: set MAC filtering as outer for non tunnel frames net/bnxt: set a VNIC as default only once net/bnxt: remove excess log messages Akash Saxena (1): crypto/openssl: fix RSA verify operation Alejandro Lucero (6): ethdev: fix MAC changes when live change not supported net/nfp: fix live MAC changes not supported net/nfp: fix mbuf flags with checksum good ethdev: fix error handling in create function net/nfp: fix RSS bus/pci: compare kernel driver instead of interrupt handler Ali Alnubani (4): net/mlx4: fix minor typo net/mlx5: fix minor typos net/mlx4: fix initialization of struct members net/mlx5: fix initialization of struct members Anatoly Burakov (10): fbarray: fix detach in --no-shconf mode eal: do not allow legacy mode with --in-memory mode mem: fix undefined behavior in NUMA-aware mapping mem: improve segment list preallocation mem: fix resource leak ipc: remove panic in async request malloc: fix adjacency check to also include segment list usertools: check for lspci dependency ipc: fix access after async request failure mem: fix division by zero in no-NUMA mode Andrew Rybchenko (2): net/sfc/base: fix build because of no declaration net/sfc: receive prepared packets even in Rx exception case Andy Moreton (4): net/sfc/base: properly align on line continuation net/sfc/base: add space after sizeof net/sfc/base: fix ID retrieval in v3 licensing net/sfc/base: fix MAC Tx stats for less or equal to 64 bytes Anoob Joseph (5): examples/ipsec-secgw: fix wrong session size app/test-crypto-perf: fix check for auth key app/test-crypto-perf: fix check for cipher IV app/test-crypto-perf: fix double allocation of memory net/octeontx: fix failures when available ports > queues Asaf Penso (1): net/mlx5: fix function documentation Bei Sun (1): net/bnxt: set VLAN strip mode before default VNIC cfg Beilei Xing (5): net/e1000: fix queue number in RSS configuration net/avf: remove keeping CRC configuration net/i40e: update Tx offload mask net/i40e: fix Rx instability with vector mode net/i40e: fix X710 Rx after reading some registers Brian Archbold (1): app/testpmd: fix duplicate exit Bruce Richardson (3): compat: fix symbol version support with meson net/avf: fix unused variables and label net/avf: fix missing compiler error flags Chaitanya Babu Talluri (1): efd: fix write unlock during ring creation Chas Williams (2): net/bonding: fix Rx slave fairness net/virtio: do not re-enter clean up routines Cristian Dumitrescu (1): examples/ip_pipeline: fix port and table stats read Damjan Marion (1): net/i40e: fix 25G AOC and ACC cable detection on XXV710 Darek Stojaczyk (4): malloc: check size hint when reserving the biggest element vfio: fix read of freed memory on getting container fd vfio: share default container in multi-process vfio: do not needlessly setup device in secondary process Dariusz Stojaczyk (2): ipc: fix undefined behavior in no-shconf mode vfio: check if group fd is already open David Hunt (1): examples/vm_power: respect maximum CPUs David Marchand (1): devtools: fix symbol check when adding experimental section Dekel Peled (3): ethdev: fix missing names in Tx offload name array net/mlx5: fix packet type for MPLS in UDP net/mlx5: fix validation of Rx queue number Dharmik Thakkar (1): test/hash: fix build Didier Pallard (7): net: fix Intel prepare function for IP checksum offload net/e1000: fix missing Tx multi-segs capability net/fm10k: fix missing Tx multi-segs capability net/i40e: fix missing Tx multi-segs capability net/ixgbe: fix missing Tx multi-segs capability drivers/net: fix several Tx prepare fu
[dpdk-dev] [PATCH 1/4] test/extmem: refactor and rename test functions
We will be adding a new extmem test that will behave roughly similar to already existing, so clarify function names to distinguish between these tests, as well as factor out the common parts. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 62 +-- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 998cafa68..6df42e3ae 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -23,7 +23,35 @@ #define EXTERNAL_MEM_SZ (RTE_PGSIZE_4K << 10) /* 4M of data */ static int -test_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, +check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) +{ + int i; + + /* check that we can get this memory from EAL now */ + for (i = 0; i < n_pages; i++) { + const struct rte_memseg *ms; + void *cur = RTE_PTR_ADD(addr, pgsz * i); + + ms = rte_mem_virt2memseg(cur, NULL); + if (ms == NULL) { + printf("%s():%i: Failed to retrieve memseg for external mem\n", + __func__, __LINE__); + return -1; + } + if (ms->addr != cur) { + printf("%s():%i: VA mismatch\n", __func__, __LINE__); + return -1; + } + if (ms->iova != iova[i]) { + printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); + return -1; + } + } + return 0; +} + +static int +test_malloc_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) { static const char * const names[] = { @@ -223,11 +251,12 @@ test_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, } static int -test_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) +test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, + int n_pages) { const char *heap_name = "heap"; void *ptr = NULL; - int socket_id, i; + int socket_id; const struct rte_memzone *mz = NULL; /* create heap */ @@ -261,26 +290,9 @@ test_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) goto fail; } - /* check that we can get this memory from EAL now */ - for (i = 0; i < n_pages; i++) { - const struct rte_memseg *ms; - void *cur = RTE_PTR_ADD(addr, pgsz * i); - - ms = rte_mem_virt2memseg(cur, NULL); - if (ms == NULL) { - printf("%s():%i: Failed to retrieve memseg for external mem\n", - __func__, __LINE__); - goto fail; - } - if (ms->addr != cur) { - printf("%s():%i: VA mismatch\n", __func__, __LINE__); - goto fail; - } - if (ms->iova != iova[i]) { - printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); - goto fail; - } - } + /* check if memory is accessible from EAL */ + if (check_mem(addr, iova, pgsz, n_pages) < 0) + goto fail; /* allocate - this now should succeed */ ptr = rte_malloc_socket("EXTMEM", 64, 0, socket_id); @@ -379,8 +391,8 @@ test_external_mem(void) iova[i] = tmp; } - ret = test_invalid_param(addr, len, pgsz, iova, n_pages); - ret |= test_basic(addr, len, pgsz, iova, n_pages); + ret = test_malloc_invalid_param(addr, len, pgsz, iova, n_pages); + ret |= test_malloc_basic(addr, len, pgsz, iova, n_pages); munmap(addr, len); -- 2.17.1
[dpdk-dev] [PATCH 2/4] test/extmem: extend autotest to test without IOVA table
Currently, only scenario with valid IOVA table is tested. Fix this by also testing without IOVA table - in these cases, EAL should always return RTE_BAD_IOVA for all memsegs. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 68 +++ 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 6df42e3ae..845c625d6 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -42,7 +42,11 @@ check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) printf("%s():%i: VA mismatch\n", __func__, __LINE__); return -1; } - if (ms->iova != iova[i]) { + if (iova == NULL && ms->iova != RTE_BAD_IOVA) { + printf("%s():%i: IOVA is not invalid\n", __func__, + __LINE__); + return -1; + } else if (iova != NULL && ms->iova != iova[i]) { printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); return -1; } @@ -218,24 +222,29 @@ test_malloc_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, goto fail; } - /* wrong page count */ - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; - } - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, n_pages - 1, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; - } - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, n_pages + 1, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; + /* the following tests are only valid if IOVA table is not NULL */ + if (iova != NULL) { + /* wrong page count */ + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, n_pages - 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, n_pages + 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } } /* tests passed, destroy heap */ @@ -257,7 +266,7 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, const char *heap_name = "heap"; void *ptr = NULL; int socket_id; - const struct rte_memzone *mz = NULL; + const struct rte_memzone *mz = NULL, *contig_mz = NULL; /* create heap */ if (rte_malloc_heap_create(heap_name) != 0) { @@ -322,12 +331,19 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, goto fail; } + /* try allocating a memzone */ + mz = rte_memzone_reserve("heap_test", pgsz * 2, socket_id, 0); + if (mz == NULL) { + printf("%s():%i: Failed to reserve memzone\n", + __func__, __LINE__); + goto fail; + } /* try allocating an IOVA-contiguous memzone - this should succeed -* because we've set up a contiguous IOVA table. +* if we've set up a contiguous IOVA table, and fail if we haven't. */ - mz = rte_memzone_reserve("heap_test", pgsz * 2, socket_id, + contig_mz = rte_memzone_reserve("heap_test_contig", pgsz * 2, socket_id, RTE_MEMZONE_IOVA_CONTIG); - if (mz == NULL) { + if (iova != NULL && contig_mz == NULL) { printf("%s():%i: Failed to reserve memzone\n", __func__, __LINE__); goto fail; @@ -342,6 +358,8 @@ test_malloc_basic(void
[dpdk-dev] [PATCH 3/4] test/extmem: check if memseg list is external
Extend the extmem autotest to check whether the memseg lists for externally allocated memory are always marked as external. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 845c625d6..5557f6e3f 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -29,10 +30,18 @@ check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) /* check that we can get this memory from EAL now */ for (i = 0; i < n_pages; i++) { + const struct rte_memseg_list *msl; const struct rte_memseg *ms; void *cur = RTE_PTR_ADD(addr, pgsz * i); - ms = rte_mem_virt2memseg(cur, NULL); + msl = rte_mem_virt2memseg_list(cur); + if (!msl->external) { + printf("%s():%i: Memseg list is not marked as external\n", + __func__, __LINE__); + return -1; + } + + ms = rte_mem_virt2memseg(cur, msl); if (ms == NULL) { printf("%s():%i: Failed to retrieve memseg for external mem\n", __func__, __LINE__); -- 2.17.1
[dpdk-dev] [PATCH 4/4] test/extmem: extend test to cover non-heap API
Currently, extmem autotest only covers the external malloc heap API. Extend it to also cover the non-heap, register/unregister external memory API. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 146 ++ 1 file changed, 146 insertions(+) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 5557f6e3f..e1abce104 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -393,6 +393,144 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, return -1; } +static int +test_extmem_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, + int n_pages) +{ + /* these calls may fail for other reasons, so check errno */ + if (rte_extmem_unregister(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Unregistered non-existent memory\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Attached to non-existent memory\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Detached from non-existent memory\n", + __func__, __LINE__); + return -1; + } + + /* zero length */ + if (rte_extmem_register(addr, 0, NULL, 0, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_unregister(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Unregistered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Attached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Detached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + /* zero address */ + if (rte_extmem_register(NULL, len, NULL, 0, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_unregister(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Unregistered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Attached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Detached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + /* the following tests are only valid if IOVA table is not NULL */ + if (iova != NULL) { + /* wrong page count */ + if (rte_extmem_register(addr, len, + iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_register(addr, len, + iova, n_pages - 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_register(addr, len, + iova, n_pages + 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + } + + return 0; +} + +static int +test_extmem_basic(void *addr, size_t len, size_t pgsz, rte_iova
[dpdk-dev] [PATCH v2 2/4] test/extmem: extend autotest to test without IOVA table
Currently, only scenario with valid IOVA table is tested. Fix this by also testing without IOVA table - in these cases, EAL should always return RTE_BAD_IOVA for all memsegs, and contiguous memzone allocation should fail. Signed-off-by: Anatoly Burakov --- Notes: v2: - Improve condition checking on contiguous memzone test - Clarify commit message test/test/test_external_mem.c | 66 +++ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 6df42e3ae..06e6ccc1d 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -31,6 +31,7 @@ check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) for (i = 0; i < n_pages; i++) { const struct rte_memseg *ms; void *cur = RTE_PTR_ADD(addr, pgsz * i); + rte_iova_t expected_iova; ms = rte_mem_virt2memseg(cur, NULL); if (ms == NULL) { @@ -42,7 +43,8 @@ check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) printf("%s():%i: VA mismatch\n", __func__, __LINE__); return -1; } - if (ms->iova != iova[i]) { + expected_iova = (iova == NULL) ? RTE_BAD_IOVA : iova[i]; + if (ms->iova != expected_iova) { printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); return -1; } @@ -218,24 +220,29 @@ test_malloc_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, goto fail; } - /* wrong page count */ - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; - } - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, n_pages - 1, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; - } - if (rte_malloc_heap_memory_add(valid_name, addr, len, - iova, n_pages + 1, pgsz) >= 0 || rte_errno != EINVAL) { - printf("%s():%i: Added memory with invalid parameters\n", - __func__, __LINE__); - goto fail; + /* the following tests are only valid if IOVA table is not NULL */ + if (iova != NULL) { + /* wrong page count */ + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, n_pages - 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } + if (rte_malloc_heap_memory_add(valid_name, addr, len, + iova, n_pages + 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Added memory with invalid parameters\n", + __func__, __LINE__); + goto fail; + } } /* tests passed, destroy heap */ @@ -257,7 +264,7 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, const char *heap_name = "heap"; void *ptr = NULL; int socket_id; - const struct rte_memzone *mz = NULL; + const struct rte_memzone *mz = NULL, *contig_mz = NULL; /* create heap */ if (rte_malloc_heap_create(heap_name) != 0) { @@ -322,12 +329,19 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, goto fail; } + /* try allocating a memzone */ + mz = rte_memzone_reserve("heap_test", pgsz * 2, socket_id, 0); + if (mz == NULL) { + printf("%s():%i: Failed to reserve memzone\n", + __func__, __LINE__); + goto fail; + } /* try allocating an IOVA-contiguous memzone - this should succeed -* because we've set up a contiguous IOVA table. +* if we've set up a contiguous IOVA table, and fail if we haven't. */ - mz = rte_memzone_reserve("heap_test", pgsz * 2, socket_id, + contig_mz = rte_memzone_reserve("h
[dpdk-dev] [PATCH v2 1/4] test/extmem: refactor and rename test functions
We will be adding a new extmem test that will behave roughly similar to already existing, so clarify function names to distinguish between these tests, as well as factor out the common parts. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 62 +-- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 998cafa68..6df42e3ae 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -23,7 +23,35 @@ #define EXTERNAL_MEM_SZ (RTE_PGSIZE_4K << 10) /* 4M of data */ static int -test_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, +check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) +{ + int i; + + /* check that we can get this memory from EAL now */ + for (i = 0; i < n_pages; i++) { + const struct rte_memseg *ms; + void *cur = RTE_PTR_ADD(addr, pgsz * i); + + ms = rte_mem_virt2memseg(cur, NULL); + if (ms == NULL) { + printf("%s():%i: Failed to retrieve memseg for external mem\n", + __func__, __LINE__); + return -1; + } + if (ms->addr != cur) { + printf("%s():%i: VA mismatch\n", __func__, __LINE__); + return -1; + } + if (ms->iova != iova[i]) { + printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); + return -1; + } + } + return 0; +} + +static int +test_malloc_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) { static const char * const names[] = { @@ -223,11 +251,12 @@ test_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, } static int -test_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) +test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, + int n_pages) { const char *heap_name = "heap"; void *ptr = NULL; - int socket_id, i; + int socket_id; const struct rte_memzone *mz = NULL; /* create heap */ @@ -261,26 +290,9 @@ test_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages) goto fail; } - /* check that we can get this memory from EAL now */ - for (i = 0; i < n_pages; i++) { - const struct rte_memseg *ms; - void *cur = RTE_PTR_ADD(addr, pgsz * i); - - ms = rte_mem_virt2memseg(cur, NULL); - if (ms == NULL) { - printf("%s():%i: Failed to retrieve memseg for external mem\n", - __func__, __LINE__); - goto fail; - } - if (ms->addr != cur) { - printf("%s():%i: VA mismatch\n", __func__, __LINE__); - goto fail; - } - if (ms->iova != iova[i]) { - printf("%s():%i: IOVA mismatch\n", __func__, __LINE__); - goto fail; - } - } + /* check if memory is accessible from EAL */ + if (check_mem(addr, iova, pgsz, n_pages) < 0) + goto fail; /* allocate - this now should succeed */ ptr = rte_malloc_socket("EXTMEM", 64, 0, socket_id); @@ -379,8 +391,8 @@ test_external_mem(void) iova[i] = tmp; } - ret = test_invalid_param(addr, len, pgsz, iova, n_pages); - ret |= test_basic(addr, len, pgsz, iova, n_pages); + ret = test_malloc_invalid_param(addr, len, pgsz, iova, n_pages); + ret |= test_malloc_basic(addr, len, pgsz, iova, n_pages); munmap(addr, len); -- 2.17.1
[dpdk-dev] [PATCH v2 4/4] test/extmem: extend test to cover non-heap API
Currently, extmem autotest only covers the external malloc heap API. Extend it to also cover the non-heap, register/unregister external memory API. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 146 ++ 1 file changed, 146 insertions(+) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index b877f8e2e..97bde1cfb 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -391,6 +391,144 @@ test_malloc_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, return -1; } +static int +test_extmem_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, + int n_pages) +{ + /* these calls may fail for other reasons, so check errno */ + if (rte_extmem_unregister(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Unregistered non-existent memory\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Attached to non-existent memory\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(addr, len) >= 0 || + rte_errno != ENOENT) { + printf("%s():%i: Detached from non-existent memory\n", + __func__, __LINE__); + return -1; + } + + /* zero length */ + if (rte_extmem_register(addr, 0, NULL, 0, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_unregister(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Unregistered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Attached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(addr, 0) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Detached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + /* zero address */ + if (rte_extmem_register(NULL, len, NULL, 0, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_unregister(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Unregistered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + if (rte_extmem_attach(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Attached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_attach(NULL, len) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Detached memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + + /* the following tests are only valid if IOVA table is not NULL */ + if (iova != NULL) { + /* wrong page count */ + if (rte_extmem_register(addr, len, + iova, 0, pgsz) >= 0 || rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_register(addr, len, + iova, n_pages - 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + if (rte_extmem_register(addr, len, + iova, n_pages + 1, pgsz) >= 0 || + rte_errno != EINVAL) { + printf("%s():%i: Registered memory with invalid parameters\n", + __func__, __LINE__); + return -1; + } + } + + return 0; +} + +static int +test_extmem_basic(void *addr, size_t len, size_t pgsz, rte_iova
[dpdk-dev] [PATCH v2 3/4] test/extmem: check if memseg list is external
Extend the extmem autotest to check whether the memseg lists for externally allocated memory are always marked as external. Signed-off-by: Anatoly Burakov --- test/test/test_external_mem.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c index 06e6ccc1d..b877f8e2e 100644 --- a/test/test/test_external_mem.c +++ b/test/test/test_external_mem.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -29,11 +30,19 @@ check_mem(void *addr, rte_iova_t *iova, size_t pgsz, int n_pages) /* check that we can get this memory from EAL now */ for (i = 0; i < n_pages; i++) { + const struct rte_memseg_list *msl; const struct rte_memseg *ms; void *cur = RTE_PTR_ADD(addr, pgsz * i); rte_iova_t expected_iova; - ms = rte_mem_virt2memseg(cur, NULL); + msl = rte_mem_virt2memseg_list(cur); + if (!msl->external) { + printf("%s():%i: Memseg list is not marked as external\n", + __func__, __LINE__); + return -1; + } + + ms = rte_mem_virt2memseg(cur, msl); if (ms == NULL) { printf("%s():%i: Failed to retrieve memseg for external mem\n", __func__, __LINE__); -- 2.17.1
Re: [dpdk-dev] [PATCH] test/crypto: fix misleading trace message
> -Original Message- > From: Trahe, Fiona > Sent: Friday, December 21, 2018 12:01 AM > To: dev@dpdk.org > Cc: De Lara Guarch, Pablo ; Trahe, Fiona > ; Zhang, Roy Fan ; > akhil.go...@nxp.com; sta...@dpdk.org > Subject: [PATCH] test/crypto: fix misleading trace message > > Test was reporting digest verification failed for all operation errors. > Fixed so it only reports this if the PMD actually reports an auth failure. > > Fixes: 9c0eed2f06ae ("app/test: rework crypto AES unit test") > CC: sta...@dpdk.org > > Signed-off-by: Fiona Trahe Acked-by: Pablo de Lara
Re: [dpdk-dev] [PATCH] test/crypto: don't attempt unsupported SGL tests on aesni mb PMD
> -Original Message- > From: Trahe, Fiona > Sent: Friday, December 21, 2018 12:02 AM > To: dev@dpdk.org > Cc: De Lara Guarch, Pablo ; Trahe, Fiona > ; Zhang, Roy Fan ; > akhil.go...@nxp.com > Subject: [PATCH] test/crypto: don't attempt unsupported SGL tests on aesni > mb PMD > > Remove AESNI_MB flag from SGL test cases which it doesn't support. > > Signed-off-by: Fiona Trahe Patch looks good, but I wonder if this is a fix and therefore, if it needs to be backported. Thanks, Pablo
Re: [dpdk-dev] [PATCH v4 04/10] lib: introduce ipsec library
On 12/20/2018 7:36 PM, Ananyev, Konstantin wrote: > >>> diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c >>> new file mode 100644 >>> index 0..f927a82bf >>> --- /dev/null >>> +++ b/lib/librte_ipsec/sa.c >>> @@ -0,0 +1,327 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2018 Intel Corporation >>> + */ >>> + >>> +#include >>> +#include >>> +#include >>> +#include >>> + >>> +#include "sa.h" >>> +#include "ipsec_sqn.h" >>> + >>> +/* some helper structures */ >>> +struct crypto_xform { >>> + struct rte_crypto_auth_xform *auth; >>> + struct rte_crypto_cipher_xform *cipher; >>> + struct rte_crypto_aead_xform *aead; >>> +}; >> shouldn't this be union as aead cannot be with cipher and auth cases. > That's used internally to collect/analyze xforms provided by > prm->crypto_xform. > > >> extra line >>> + >>> + >>> +static int >>> +check_crypto_xform(struct crypto_xform *xform) >>> +{ >>> + uintptr_t p; >>> + >>> + p = (uintptr_t)xform->auth | (uintptr_t)xform->cipher; >> what is the intent of this? > It is used below to check that if aead is present both cipher and auth > are not. > >>> + >>> + /* either aead or both auth and cipher should be not NULLs */ >>> + if (xform->aead) { >>> + if (p) >>> + return -EINVAL; >>> + } else if (p == (uintptr_t)xform->auth) { >>> + return -EINVAL; >>> + } >> This function does not look good. It will miss the case of cipher only > Cipher only is not supported right now and I am not aware about plans > to support it in future. > If someone would like to add cipher onl,then yes he/she probably would > have to update this function. I know that cipher_only is not supported and nobody will support it in case of ipsec. My point is if somebody gives only auth or only cipher xform, then this function would not be able to detect that case and will not return error. >>> + >>> + return 0; >>> +} >>> + >>> +static int >>> +fill_crypto_xform(struct crypto_xform *xform, >>> + const struct rte_ipsec_sa_prm *prm) >>> +{ >>> + struct rte_crypto_sym_xform *xf; >>> + >>> + memset(xform, 0, sizeof(*xform)); >>> + >>> + for (xf = prm->crypto_xform; xf != NULL; xf = xf->next) { >>> + if (xf->type == RTE_CRYPTO_SYM_XFORM_AUTH) { >>> + if (xform->auth != NULL) >>> + return -EINVAL; >>> + xform->auth = &xf->auth; >>> + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_CIPHER) { >>> + if (xform->cipher != NULL) >>> + return -EINVAL; >>> + xform->cipher = &xf->cipher; >>> + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_AEAD) { >>> + if (xform->aead != NULL) >>> + return -EINVAL; >>> + xform->aead = &xf->aead; >>> + } else >>> + return -EINVAL; >>> + } >>> + >>> + return check_crypto_xform(xform); >>> +} >> how is this function handling the inbound and outbound cases. >> In inbound first xform is auth and then cipher. >> In outbound first is cipher and then auth. I think this should be >> checked in the lib. > Interesting, I didn't know about such limitation. > My understanding was that the any order (, ) > for both inbound and outbound is acceptable. > Is that order restriction is documented somewhere? /** * Symmetric crypto transform structure. * * This is used to specify the crypto transforms required, multiple transforms * can be chained together to specify a chain transforms such as authentication * then cipher, or cipher then authentication. Each transform structure can * hold a single transform, the type field is used to specify which transform * is contained within the union */ struct rte_crypto_sym_xform { This is not a limitation, this is how it is designed to handle 2 cases of crypto - auth then cipher and cipher then auth. >> Here for loop should not be there, as there would be at max only 2 xforms. >>> + >>> +uint64_t __rte_experimental >>> +rte_ipsec_sa_type(const struct rte_ipsec_sa *sa) >>> +{ >>> + return sa->type; >>> +} >>> + >>> +static int32_t >>> +ipsec_sa_size(uint32_t wsz, uint64_t type, uint32_t *nb_bucket) >>> +{ >>> + uint32_t n, sz; >>> + >>> + n = 0; >>> + if (wsz != 0 && (type & RTE_IPSEC_SATP_DIR_MASK) == >>> + RTE_IPSEC_SATP_DIR_IB) >>> + n = replay_num_bucket(wsz); >>> + >>> + if (n > WINDOW_BUCKET_MAX) >>> + return -EINVAL; >>> + >>> + *nb_bucket = n; >>> + >>> + sz = rsn_size(n); >>> + sz += sizeof(struct rte_ipsec_sa); >>> + return sz; >>> +} >>> + >>> +void __rte_experimental >>> +rte_ipsec_sa_fini(struct rte_ipsec_sa *sa) >>> +{ >>> + memset(sa, 0, sa->size); >>> +} >> Where is the memory of "sa" getting initialized? > Not sure I understand your question... > Do you mean we missed memset(sa, 0, size) > in rte_ipsec_sa_init()? Sorry I did not
Re: [dpdk-dev] [PATCH v4 04/10] lib: introduce ipsec library
On 12/20/2018 11:47 PM, Ananyev, Konstantin wrote: + +static int +fill_crypto_xform(struct crypto_xform *xform, + const struct rte_ipsec_sa_prm *prm) +{ + struct rte_crypto_sym_xform *xf; + + memset(xform, 0, sizeof(*xform)); + + for (xf = prm->crypto_xform; xf != NULL; xf = xf->next) { + if (xf->type == RTE_CRYPTO_SYM_XFORM_AUTH) { + if (xform->auth != NULL) + return -EINVAL; + xform->auth = &xf->auth; + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_CIPHER) { + if (xform->cipher != NULL) + return -EINVAL; + xform->cipher = &xf->cipher; + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_AEAD) { + if (xform->aead != NULL) + return -EINVAL; + xform->aead = &xf->aead; + } else + return -EINVAL; + } + + return check_crypto_xform(xform); +} >>> how is this function handling the inbound and outbound cases. >>> In inbound first xform is auth and then cipher. >>> In outbound first is cipher and then auth. I think this should be >>> checked in the lib. >> Interesting, I didn't know about such limitation. >> My understanding was that the any order (, ) >> for both inbound and outbound is acceptable. >> Is that order restriction is documented somewhere? >> > Actually, if such restriction really exists, and cryptodev framework obeys it, > then crypto session creation will fail anyway. ipsec library should not rely on other components to give error. it should handle the cases which it is expected to. As per my understanding, IPSEC is a cipher then authenticate protocol for outbound case and it should give error in other case. Similarly, auth then cipher in case of inbound case. >>> Here for loop should not be there, as there would be at max only 2 xforms.
Re: [dpdk-dev] [PATCH 18.11] malloc: fix deadlock when using malloc stats
04/12/2018 13:22, Anatoly Burakov: > Currently, malloc statistics and external heap creation code > use memory hotplug lock as a way to synchronize accesses to > heaps (as in, locking the hotplug lock to prevent list of heaps > from changing under our feet). At the same time, malloc > statistics code will also lock the heap because it needs to > access heap data and does not want any other thread to allocate > anything from that heap. > > In such scheme, it is possible to enter a deadlock with the > following sequence of events: > > thread 1 thread 2 > rte_malloc() > rte_malloc_dump_stats() > take heap lock > take hotplug lock > failed to allocate, > attempt to take > hotplug lock > attempt to take heap lock > > Neither thread will be able to continue, as both of them are > waiting for the other one to drop the lock. Adding an > additional lock will require an ABI change, so instead of > that, make malloc statistics calls thread-unsafe with > respect to creating/destroying heaps. > > Fixes: 72cf92b31855 ("malloc: index heaps using heap ID rather than NUMA > node") > Cc: sta...@dpdk.org > > Signed-off-by: Anatoly Burakov > --- > > Notes: > IMO this is the best we can do for 18.11 without breaking ABI. > For 19.02, we can introduce a new global heap lock (something > i should've done in the first place...), so this patch is > not applicable to 19.02. For 19.02, we can fix this properly > by introducing another lock and breaking the EAL ABI. > > Not sure where to put docs update, feedback welcome. This patch is also changing the API, because functions become not thread-safe. I think you should note it in the release notes. About 19.02, do we want to take this patch (with release notes updated)?
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: > From: Ivan Malov > > This capability is reported when supported by the current emitting > sub-device. Failsafe PMD itself does not excercise fast free logic. I think overlay device capability reporting already discussed a few times, the question is if an overlay devices should claim a feature when it depends on underlay devices? Given that no ack/review given to the patch, I am updating it as rejected. > > Signed-off-by: Ivan Malov > Signed-off-by: Andrew Rybchenko > --- > doc/guides/nics/features/failsafe.ini | 1 + > drivers/net/failsafe/failsafe_ops.c | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/doc/guides/nics/features/failsafe.ini > b/doc/guides/nics/features/failsafe.ini > index e3c4c08f2..b6f3dcee6 100644 > --- a/doc/guides/nics/features/failsafe.ini > +++ b/doc/guides/nics/features/failsafe.ini > @@ -7,6 +7,7 @@ > Link status = Y > Link status event= Y > Rx interrupt = Y > +Fast mbuf free = Y > Queue start/stop = Y > Runtime Rx queue setup = Y > Runtime Tx queue setup = Y > diff --git a/drivers/net/failsafe/failsafe_ops.c > b/drivers/net/failsafe/failsafe_ops.c > index 7f8bcd4c6..e3add404b 100644 > --- a/drivers/net/failsafe/failsafe_ops.c > +++ b/drivers/net/failsafe/failsafe_ops.c > @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { > DEV_RX_OFFLOAD_SECURITY, > .tx_offload_capa = > DEV_TX_OFFLOAD_MULTI_SEGS | > + DEV_TX_OFFLOAD_MBUF_FAST_FREE | > DEV_TX_OFFLOAD_IPV4_CKSUM | > DEV_TX_OFFLOAD_UDP_CKSUM | > DEV_TX_OFFLOAD_TCP_CKSUM | >
Re: [dpdk-dev] [PATCH 18.11] malloc: fix deadlock when using malloc stats
On 21-Dec-18 12:09 PM, Thomas Monjalon wrote: 04/12/2018 13:22, Anatoly Burakov: Currently, malloc statistics and external heap creation code use memory hotplug lock as a way to synchronize accesses to heaps (as in, locking the hotplug lock to prevent list of heaps from changing under our feet). At the same time, malloc statistics code will also lock the heap because it needs to access heap data and does not want any other thread to allocate anything from that heap. In such scheme, it is possible to enter a deadlock with the following sequence of events: thread 1thread 2 rte_malloc() rte_malloc_dump_stats() take heap lock take hotplug lock failed to allocate, attempt to take hotplug lock attempt to take heap lock Neither thread will be able to continue, as both of them are waiting for the other one to drop the lock. Adding an additional lock will require an ABI change, so instead of that, make malloc statistics calls thread-unsafe with respect to creating/destroying heaps. Fixes: 72cf92b31855 ("malloc: index heaps using heap ID rather than NUMA node") Cc: sta...@dpdk.org Signed-off-by: Anatoly Burakov --- Notes: IMO this is the best we can do for 18.11 without breaking ABI. For 19.02, we can introduce a new global heap lock (something i should've done in the first place...), so this patch is not applicable to 19.02. For 19.02, we can fix this properly by introducing another lock and breaking the EAL ABI. Not sure where to put docs update, feedback welcome. This patch is also changing the API, because functions become not thread-safe. I think you should note it in the release notes. About 19.02, do we want to take this patch (with release notes updated)? Yes and yes. Technically, they still are thread-safe when it comes to individual heap access - they just aren't thread-safe with regards to creating/destroying heaps (so, we may enter the dump function, and a heap may be added/removed while we're iterating over the list of heaps). I'll send a v2 with release notes update. -- Thanks, Anatoly
Re: [dpdk-dev] [PATCH v4 05/10] ipsec: add SA data-path API
On 12/20/2018 3:47 PM, Ananyev, Konstantin wrote: > >>> + * @param ss >>> + * Pointer to the *rte_ipsec_session* object >>> + * @return >>> + * - Zero if operation completed successfully. >>> + * - -EINVAL if the parameters are invalid. >>> + */ >>> +int __rte_experimental >>> +rte_ipsec_session_prepare(struct rte_ipsec_session *ss); >>> + >>> +/** >>> + * For input mbufs and given IPsec session prepare crypto ops that can be >>> + * enqueued into the cryptodev associated with given session. >>> + * expects that for each input packet: >>> + * - l2_len, l3_len are setup correctly >>> + * Note that erroneous mbufs are not freed by the function, >>> + * but are placed beyond last valid mbuf in the *mb* array. >>> + * It is a user responsibility to handle them further. >> How will the user know how many mbufs are correctly processed. > Function return value contains number of successfully processed packets, > see comments below. > As an example, let say at input mb[]={A, B, C, D}, num=4, and prepare() > was able to successfully process A, B, D mbufs but failed to process C. > Then return value will be 3, and mb[]={A, B, D, C}. Wouldn't that hit performance, movement of mbufs. Can we leverage the crypto_op->status field. It is just a thought.
[dpdk-dev] [PATCH] net/sfc: pass HW Tx queue index on creation
Software indexes are PMD internal and should not be passed outside. Right now SW and HW indexes of the Tx queue match, so it is just a cosmetic fix. Fixes: dbdc82416b72 ("net/sfc: factor out libefx-based Tx datapath") Cc: sta...@dpdk.org Signed-off-by: Andrew Rybchenko --- drivers/net/sfc/sfc_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index 147f93365..cce823954 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -451,7 +451,7 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) if (txq->offloads & DEV_TX_OFFLOAD_TCP_TSO) flags |= EFX_TXQ_FATSOV2; - rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, + rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, txq_info->entries, 0 /* not used on EF10 */, flags, evq->common, &txq->common, &desc_index); -- 2.17.1
[dpdk-dev] [PATCH ] net/avf/base: fix comment referencing internal data
DCR is Intel internal information, no need to be in public code. See also a parallel patch by Ferruh, commit 1a0833efde70 ("net/i40e/base: fix comment referencing internal data"). Fixes: e5b2a9e957e7 ("net/avf/base: add base code for avf PMD") Cc: sta...@dpdk.org Signed-off-by: Rami Rosen --- drivers/net/avf/base/avf_adminq_cmd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/avf/base/avf_adminq_cmd.h b/drivers/net/avf/base/avf_adminq_cmd.h index 1709f317e..795491187 100644 --- a/drivers/net/avf/base/avf_adminq_cmd.h +++ b/drivers/net/avf/base/avf_adminq_cmd.h @@ -1435,8 +1435,7 @@ struct avf_aqc_add_remove_cloud_filters_element_data { }; /* avf_aqc_add_rm_cloud_filt_elem_ext is used when - * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. refer to - * DCR288 + * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. */ struct avf_aqc_add_rm_cloud_filt_elem_ext { struct avf_aqc_add_remove_cloud_filters_element_data element; -- 2.19.2
Re: [dpdk-dev] [PATCH v2 3/3] net/af_packet: get 'framesz' from the iface's MTU
On 12/17/2018 9:21 AM, Lam, Tiago wrote: > Hi Ferruh, > > On 28/11/2018 13:33, Ferruh Yigit wrote: >> On 11/28/2018 1:12 PM, Lam, Tiago wrote: >>> On 27/11/2018 17:43, Ferruh Yigit wrote: On 11/20/2018 10:26 AM, Tiago Lam wrote: > Use the underlying MTU to calculate the framsize to be used for the mmap > RINGs. This is to make it more flexible on deployments with different > MTU requirements, instead of using a pre-defined value of 2048B. This behavior change should be documented in af_packet documentation which is missing unfortunately. Would you able to introduce the initial/basic af_packet doc to at least to document device argument? If not please let me know, I can work on it. >>> >>> Thanks for the review, Ferruh. >>> >>> Yeah, I don't mind cooking something up and submitting here for review; >>> I'll wait a couple of days for a reply from John W. before proceeding, >>> though. >> >> Thanks, appreciated. Agreed to wait a little. >> >>> >>> But given there's no documentation for af_packet yet, do you prefer to >>> wait for that to be available, and apply it all together? Or could that >>> be applied later as part of another patch? >> >> Both are OK, depends on your availability. >> >> I think it is better, to show the history, first patch as the documentation >> patch for existing behavior and your patch updating framsz usage (3/3) to >> update >> that document as well. > > As agreed, I just sent a patch with an initial take on adding some docs > for af_packet. Once that's in I'll submit another revision of this > patchset, including an update to the documentation. > > Just an aside, patch 1/3 of this series is a bugfix, it could go in > irrespective of the documentation, it seems. Agreed. Doc patch is merged, I will get first patch and will wait for a new version for next.
[dpdk-dev] [PATCH 19.02 v2] malloc: fix deadlock when using malloc stats
Currently, malloc statistics and external heap creation code use memory hotplug lock as a way to synchronize accesses to heaps (as in, locking the hotplug lock to prevent list of heaps from changing under our feet). At the same time, malloc statistics code will also lock the heap because it needs to access heap data and does not want any other thread to allocate anything from that heap. In such scheme, it is possible to enter a deadlock with the following sequence of events: thread 1thread 2 rte_malloc() rte_malloc_dump_stats() take heap lock take hotplug lock failed to allocate, attempt to take hotplug lock attempt to take heap lock Neither thread will be able to continue, as both of them are waiting for the other one to drop the lock. Adding an additional lock will require an ABI change, so instead of that, make malloc statistics calls thread-unsafe with respect to creating/destroying heaps. Fixes: 72cf92b31855 ("malloc: index heaps using heap ID rather than NUMA node") Cc: sta...@dpdk.org Signed-off-by: Anatoly Burakov --- Notes: This is the best we can do for 19.02 without breaking ABI. doc/guides/rel_notes/release_19_02.rst | 4 lib/librte_eal/common/include/rte_malloc.h | 9 + lib/librte_eal/common/rte_malloc.c | 19 +++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/doc/guides/rel_notes/release_19_02.rst b/doc/guides/rel_notes/release_19_02.rst index 47768288a..0b248d55d 100644 --- a/doc/guides/rel_notes/release_19_02.rst +++ b/doc/guides/rel_notes/release_19_02.rst @@ -126,6 +126,10 @@ API Changes - In cases where memfd support would have been required to provide segment fd's (such as in-memory or no-huge mode) +* eal: Functions ``rte_malloc_dump_stats()``, ``rte_malloc_dump_heaps()`` and + ``rte_malloc_get_socket_stats()`` are no longer safe to call concurrently with + ``rte_malloc_heap_create()`` or ``rte_malloc_heap_destroy()`` function calls. + * pdump: The ``rte_pdump_set_socket_dir()``, the parameter ``path`` of ``rte_pdump_init()`` and enum ``rte_pdump_socktype`` were deprecated since 18.05 and are removed in this release. diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index a5290b074..54a12467a 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -251,6 +251,9 @@ rte_malloc_validate(const void *ptr, size_t *size); /** * Get heap statistics for the specified heap. * + * @note This function is not thread-safe with respect to + *``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param socket * An unsigned integer specifying the socket to get heap statistics for * @param socket_stats @@ -461,6 +464,9 @@ rte_malloc_heap_socket_is_external(int socket_id); * Dump for the specified type to a file. If the type argument is * NULL, all memory types will be dumped. * + * @note This function is not thread-safe with respect to + *``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param f * A pointer to a file for output * @param type @@ -473,6 +479,9 @@ rte_malloc_dump_stats(FILE *f, const char *type); /** * Dump contents of all malloc heaps to a file. * + * @note This function is not thread-safe with respect to + *``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param f * A pointer to a file for output */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 09051c236..b39de3c99 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -156,20 +156,14 @@ rte_malloc_get_socket_stats(int socket, struct rte_malloc_socket_stats *socket_stats) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int heap_idx, ret = -1; - - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + int heap_idx; heap_idx = malloc_socket_to_heap_id(socket); if (heap_idx < 0) - goto unlock; + return -1; - ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], + return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], socket_stats); -unlock: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - - return ret; } /* @@ -181,14 +175,10 @@ rte_malloc_dump_heaps(FILE *f) struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; unsigned int idx; - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - for (idx = 0; idx < RTE_MAX_HEAPS; idx++) { fprintf(f, "Heap id: %u\n", idx); malloc_heap_dump(&mcfg->malloc_heaps[idx], f); } - - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); } int @@ -26
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 12/21/18 3:12 PM, Ferruh Yigit wrote: On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: From: Ivan Malov This capability is reported when supported by the current emitting sub-device. Failsafe PMD itself does not excercise fast free logic. I think overlay device capability reporting already discussed a few times, the question is if an overlay devices should claim a feature when it depends on underlay devices? The capability may be reported by the failsafe since it is transparent from fast free logic point of view. Given that no ack/review given to the patch, I am updating it as rejected. Is it a new policy? I thought that it was vice versa before. Signed-off-by: Ivan Malov Signed-off-by: Andrew Rybchenko --- doc/guides/nics/features/failsafe.ini | 1 + drivers/net/failsafe/failsafe_ops.c | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini index e3c4c08f2..b6f3dcee6 100644 --- a/doc/guides/nics/features/failsafe.ini +++ b/doc/guides/nics/features/failsafe.ini @@ -7,6 +7,7 @@ Link status = Y Link status event= Y Rx interrupt = Y +Fast mbuf free = Y Queue start/stop = Y Runtime Rx queue setup = Y Runtime Tx queue setup = Y diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index 7f8bcd4c6..e3add404b 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { DEV_RX_OFFLOAD_SECURITY, .tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_MBUF_FAST_FREE | DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM |
Re: [dpdk-dev] [PATCH v2 1/3] net/af_packet: set_mtu() decrements sockaddr twice
On 11/20/2018 10:26 AM, Tiago Lam wrote: > When setting the MTU, eth_dev_mtu_set() is called to validate the > provided MTU. As part of that, it calculates the useful area to store > data and compares it against the MTU, to guarantee that there's enough > space to store the data. It calculates that as: > "tp_frame_size - TPACKET2_HDRLEN - sizeof(struct sockaddr_ll)" > > However, the TPACKET2_HDRLEN macro already increaments sizeof(struct > sockaddr_ll) internally, meaning the useuful area of data above will > have sizeof(struct sockaddr_ll) decremented twice. > > Instead, the useful area of data should be calculated as: > "tp_frame_size - TPACKET2_HDRLEN" > > This makes sure that there's enough useful area to fit the provided MTU > after excluding tpacket2_hdr and sockaddr_ll. > > Fixes: cc68ac4 ("net/af_packet: support MTU change") > > Signed-off-by: Tiago Lam Reviewed-by: Ferruh Yigit Applied to dpdk-next-net/master, thanks.
Re: [dpdk-dev] net/tap: fix possible uninitialized variable access
On 11/5/2018 3:31 PM, Ferruh Yigit wrote: > Fixes: 7c25284e30c2 ("net/tap: add netlink back-end for flow API") > Cc: sta...@dpdk.org > > Signed-off-by: Ferruh Yigit Applied to dpdk-next-net/master, thanks. (I guess it was trivial enough to not get any response, merged as it is)
Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
On 12/20/2018 6:26 PM, Ananyev, Konstantin wrote: > >>> diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h >>> new file mode 100644 >>> index 0..61f5c1433 >>> --- /dev/null >>> +++ b/lib/librte_ipsec/crypto.h >>> @@ -0,0 +1,123 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2018 Intel Corporation >>> + */ >>> + >>> +#ifndef _CRYPTO_H_ >>> +#define _CRYPTO_H_ >>> + >>> +/** >>> + * @file crypto.h >>> + * Contains crypto specific functions/structures/macros used internally >>> + * by ipsec library. >>> + */ >>> + >>> + /* >>> + * AES-GCM devices have some specific requirements for IV and AAD formats. >>> + * Ideally that to be done by the driver itself. >>> + */ >> I believe these can be moved to rte_crypto_sym.h. All crypto related >> stuff should be at same place. > Not sure what exactly you suggest to put into rte_crypto_sym.h? > struct aead_gcm_iv? Something else? > From my perspective it would be good if user in ctypto_sym_op > just fill salt and IV fields, and then PMD setup things in needed > format internally. > Again it would be really good if crypto_sym_op has reserved space > for AAD... > But all that implies quite a big change in cryptodev and PMDs, > so I think should be subject of a separate patch. > >>> + >>> +struct aead_gcm_iv { >>> + uint32_t salt; >>> + uint64_t iv; >>> + uint32_t cnt; >>> +} __attribute__((packed)); >>> + >>> +struct aead_gcm_aad { >>> + uint32_t spi; >>> + /* >>> +* RFC 4106, section 5: >>> +* Two formats of the AAD are defined: >>> +* one for 32-bit sequence numbers, and one for 64-bit ESN. >>> +*/ >>> + union { >>> + uint32_t u32[2]; >>> + uint64_t u64; >>> + } sqn; >>> + uint32_t align0; /* align to 16B boundary */ >>> +} __attribute__((packed)); >>> + >>> +struct gcm_esph_iv { >>> + struct esp_hdr esph; >>> + uint64_t iv; >>> +} __attribute__((packed)); >>> + >>> + >>> +static inline void >>> +aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt) >>> +{ >>> + gcm->salt = salt; >>> + gcm->iv = iv; >>> + gcm->cnt = rte_cpu_to_be_32(1); >>> +} >>> + >>> +/* > >>> diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h >>> new file mode 100644 >>> index 0..3fd93016d >>> --- /dev/null >>> +++ b/lib/librte_ipsec/iph.h >>> @@ -0,0 +1,84 @@ >>> +/* SPDX-License-Identifier: BSD-3-Clause >>> + * Copyright(c) 2018 Intel Corporation >>> + */ >>> + >>> +#ifndef _IPH_H_ >>> +#define _IPH_H_ >>> + >>> +/** >>> + * @file iph.h >>> + * Contains functions/structures/macros to manipulate IPv/IPv6 headers >> IPv4 >>> + * used internally by ipsec library. >>> + */ >>> + >>> +/* >>> + * Move preceding (L3) headers down to remove ESP header and IV. >>> + */ >> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths. > We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len. > But apart from that for transport mode we have to move actual packet headers. > Let say for inbound we have to get rid of ESP header (which is after IP > header), > but preserve IP header, so we moving L2/L3 headers down, overwriting ESP > header. ok got your point >> I believe these adjustments are happening in the mbuf itself. >> Moreover these APIs are not specific to esp headers. > I didn't get your last sentence: that function is used to remove esp header > (see above) - that's why I named it that way. These can be used to remove any header and not specifically esp. So this API could be generic in rte_mbuf. > >>> +static inline void >>> +remove_esph(char *np, char *op, uint32_t hlen) >>> +{ >>> + uint32_t i; >>> + >>> + for (i = hlen; i-- != 0; np[i] = op[i]) >>> + ; >>> +} >>> + >>> +/* > >>> + >>> +/* update original and new ip header fields for tunnel case */ >>> +static inline void >>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, >>> + uint32_t l2len, rte_be16_t pid) >>> +{ >>> + struct ipv4_hdr *v4h; >>> + struct ipv6_hdr *v6h; >>> + >>> + if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) { >>> + v4h = p; >>> + v4h->packet_id = pid; >>> + v4h->total_length = rte_cpu_to_be_16(plen - l2len); >> where are we updating the rest of the fields, like ttl, checksum, ip >> addresses, etc > TTL, ip addresses and other fileds supposed to be setuped by user > and provided via rte_ipsec_sa_init(): > struct rte_ipsec_sa_prm.tun.hdr should contain prepared template > for L3(and L2 if user wants to) header. > Checksum calculation is not done inside the lib right now - > it is a user responsibility to caclucate/set it after librte_ipsec > finishes processing the packet. I believe static fields are updated during sa init but some fields like ttl and checksum, can be updated in the library itself which is updated for every packet. (https://tools.ietf.org/html/rfc1624) > >>> + } else { >>> + v6h = p; >>> + v6h->payload_len = rte_cpu_to_be_16(plen - l2
Re: [dpdk-dev] [PATCH v4 08/10] ipsec: helper functions to group completed crypto-ops
On 12/20/2018 6:30 PM, Ananyev, Konstantin wrote: > >>> + >>> +/** >>> + * Take crypto-op as an input and extract pointer to related ipsec session. >>> + * @param cop >>> + * The address of an input *rte_crypto_op* structure. >>> + * @return >>> + * The pointer to the related *rte_ipsec_session* structure. >>> + */ >>> +static inline __rte_experimental struct rte_ipsec_session * >>> +rte_ipsec_ses_from_crypto(const struct rte_crypto_op *cop) >> __rte_experimental placement not correct > You mean why not: > static inline struct rte_ipsec_session * __rte_experimental > ? yes > Then checkpatch will complain about the space after '*'. ok > BTW why do you think current definition is wrong? this is how it is being used in the rest of the code. > >>> +{ >>> + const struct rte_security_session *ss; >>> + const struct rte_cryptodev_sym_session *cs; >>> + >>> + if (cop->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) { >>> + ss = cop->sym[0].sec_session; >>> + return (void *)(uintptr_t)ss->opaque_data; >>> + } else if (cop->sess_type == RTE_CRYPTO_OP_WITH_SESSION) { >>> + cs = cop->sym[0].session; >>> + return (void *)(uintptr_t)cs->opaque_data; >>> + } >>> + return NULL; >>> +} >>> +
Re: [dpdk-dev] [PATCH v4 09/10] test/ipsec: introduce functional test
On 12/20/2018 6:33 PM, Ananyev, Konstantin wrote: > >> >> On 12/14/2018 9:53 PM, Konstantin Ananyev wrote: >>> +static struct unit_test_suite ipsec_testsuite = { >>> + .suite_name = "IPsec NULL Unit Test Suite", >>> + .setup = testsuite_setup, >>> + .teardown = testsuite_teardown, >>> + .unit_test_cases = { >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_crypto_inb_burst_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_crypto_outb_burst_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_inline_inb_burst_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_inline_outb_burst_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_replay_inb_inside_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_replay_inb_outside_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_replay_inb_repeat_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_replay_inb_inside_burst_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_crypto_inb_burst_2sa_null_null_wrapper), >>> + TEST_CASE_ST(ut_setup, ut_teardown, >>> + test_ipsec_crypto_inb_burst_2sa_4grp_null_null_wrapper), >>> + TEST_CASES_END() /**< NULL terminate unit test array */ >>> + } >>> +}; >>> + >> test case for lookaside proto and inline proto case should also be added >> here. > Do you mean one with dummy security context and session as we done for > inline-crypto here? > Konstantin yes.
Re: [dpdk-dev] [PATCH v4 04/10] lib: introduce ipsec library
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 11:53 AM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Thomas Monjalon ; Awal, Mohammad Abdul > > Subject: Re: [dpdk-dev] [PATCH v4 04/10] lib: introduce ipsec library > > > > On 12/20/2018 7:36 PM, Ananyev, Konstantin wrote: > > > >>> diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c > >>> new file mode 100644 > >>> index 0..f927a82bf > >>> --- /dev/null > >>> +++ b/lib/librte_ipsec/sa.c > >>> @@ -0,0 +1,327 @@ > >>> +/* SPDX-License-Identifier: BSD-3-Clause > >>> + * Copyright(c) 2018 Intel Corporation > >>> + */ > >>> + > >>> +#include > >>> +#include > >>> +#include > >>> +#include > >>> + > >>> +#include "sa.h" > >>> +#include "ipsec_sqn.h" > >>> + > >>> +/* some helper structures */ > >>> +struct crypto_xform { > >>> + struct rte_crypto_auth_xform *auth; > >>> + struct rte_crypto_cipher_xform *cipher; > >>> + struct rte_crypto_aead_xform *aead; > >>> +}; > >> shouldn't this be union as aead cannot be with cipher and auth cases. > > That's used internally to collect/analyze xforms provided by > > prm->crypto_xform. > > > > > > >> extra line > >>> + > >>> + > >>> +static int > >>> +check_crypto_xform(struct crypto_xform *xform) > >>> +{ > >>> + uintptr_t p; > >>> + > >>> + p = (uintptr_t)xform->auth | (uintptr_t)xform->cipher; > >> what is the intent of this? > > It is used below to check that if aead is present both cipher and auth > > are not. > > > >>> + > >>> + /* either aead or both auth and cipher should be not NULLs */ > >>> + if (xform->aead) { > >>> + if (p) > >>> + return -EINVAL; > >>> + } else if (p == (uintptr_t)xform->auth) { > >>> + return -EINVAL; > >>> + } > >> This function does not look good. It will miss the case of cipher only > > Cipher only is not supported right now and I am not aware about plans > > to support it in future. > > If someone would like to add cipher onl,then yes he/she probably would > > have to update this function. > I know that cipher_only is not supported and nobody will support it in > case of ipsec. > My point is if somebody gives only auth or only cipher xform, then this > function would not be able to detect that case and will not return error. fill_crypto_xform() (the function below) will detect it and return an error: + if (xf->type == RTE_CRYPTO_SYM_XFORM_AUTH) { + if (xform->auth != NULL) + return -EINVAL; > > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +static int > >>> +fill_crypto_xform(struct crypto_xform *xform, > >>> + const struct rte_ipsec_sa_prm *prm) > >>> +{ > >>> + struct rte_crypto_sym_xform *xf; > >>> + > >>> + memset(xform, 0, sizeof(*xform)); > >>> + > >>> + for (xf = prm->crypto_xform; xf != NULL; xf = xf->next) { > >>> + if (xf->type == RTE_CRYPTO_SYM_XFORM_AUTH) { > >>> + if (xform->auth != NULL) > >>> + return -EINVAL; > >>> + xform->auth = &xf->auth; > >>> + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_CIPHER) { > >>> + if (xform->cipher != NULL) > >>> + return -EINVAL; > >>> + xform->cipher = &xf->cipher; > >>> + } else if (xf->type == RTE_CRYPTO_SYM_XFORM_AEAD) { > >>> + if (xform->aead != NULL) > >>> + return -EINVAL; > >>> + xform->aead = &xf->aead; > >>> + } else > >>> + return -EINVAL; > >>> + } > >>> + > >>> + return check_crypto_xform(xform); > >>> +} > >> how is this function handling the inbound and outbound cases. > >> In inbound first xform is auth and then cipher. > >> In outbound first is cipher and then auth. I think this should be > >> checked in the lib. > > Interesting, I didn't know about such limitation. > > My understanding was that the any order (, ) > > for both inbound and outbound is acceptable. > > Is that order restriction is documented somewhere? > /** > * Symmetric crypto transform structure. > * > * This is used to specify the crypto transforms required, multiple > transforms > * can be chained together to specify a chain transforms such as > authentication > * then cipher, or cipher then authentication. Each transform structure can > * hold a single transform, the type field is used to specify which > transform > * is contained within the union > */ > struct rte_crypto_sym_xform { Yes, I read this but I don't see where it says that order of xforms implicitly defines order of operations for that session within crypto-dev. Or is it just me? I don't mind to add extra check here, just want to be sure it is really required for crypto PMD to work correctly. > > This is not a limitation, this is how it is designed to handle 2 cases > of crypto - auth then cipher and cipher then auth. > Ok, if you sure it is a valid check - I'll add
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: > On 12/21/18 3:12 PM, Ferruh Yigit wrote: >> On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: >>> From: Ivan Malov >>> >>> This capability is reported when supported by the current emitting >>> sub-device. Failsafe PMD itself does not excercise fast free logic. >> I think overlay device capability reporting already discussed a few times, >> the >> question is if an overlay devices should claim a feature when it depends on >> underlay devices? > > The capability may be reported by the failsafe since it is transparent from > fast free logic point of view. Why it is transparent? If one of the underlying device doesn't support/claim this feature, application can't use this feature with failsafe, isn't it? > >> Given that no ack/review given to the patch, I am updating it as rejected. > > Is it a new policy? I thought that it was vice versa before. Hi Andrew, Yes policy is other-way around indeed, when there is no comment at all default behavior is accept, but please take above paragraph as my comment to the patch. And I was thinking it is a little controversial and there is no support to have it, so lets don't get it. What do you think? > >>> Signed-off-by: Ivan Malov >>> Signed-off-by: Andrew Rybchenko >>> --- >>> doc/guides/nics/features/failsafe.ini | 1 + >>> drivers/net/failsafe/failsafe_ops.c | 1 + >>> 2 files changed, 2 insertions(+) >>> >>> diff --git a/doc/guides/nics/features/failsafe.ini >>> b/doc/guides/nics/features/failsafe.ini >>> index e3c4c08f2..b6f3dcee6 100644 >>> --- a/doc/guides/nics/features/failsafe.ini >>> +++ b/doc/guides/nics/features/failsafe.ini >>> @@ -7,6 +7,7 @@ >>> Link status = Y >>> Link status event= Y >>> Rx interrupt = Y >>> +Fast mbuf free = Y >>> Queue start/stop = Y >>> Runtime Rx queue setup = Y >>> Runtime Tx queue setup = Y >>> diff --git a/drivers/net/failsafe/failsafe_ops.c >>> b/drivers/net/failsafe/failsafe_ops.c >>> index 7f8bcd4c6..e3add404b 100644 >>> --- a/drivers/net/failsafe/failsafe_ops.c >>> +++ b/drivers/net/failsafe/failsafe_ops.c >>> @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { >>> DEV_RX_OFFLOAD_SECURITY, >>> .tx_offload_capa = >>> DEV_TX_OFFLOAD_MULTI_SEGS | >>> + DEV_TX_OFFLOAD_MBUF_FAST_FREE | >>> DEV_TX_OFFLOAD_IPV4_CKSUM | >>> DEV_TX_OFFLOAD_UDP_CKSUM | >>> DEV_TX_OFFLOAD_TCP_CKSUM | >>> >
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
Hi Ferruh, On 12/21/18 3:43 PM, Ferruh Yigit wrote: On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: On 12/21/18 3:12 PM, Ferruh Yigit wrote: On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: From: Ivan Malov This capability is reported when supported by the current emitting sub-device. Failsafe PMD itself does not excercise fast free logic. I think overlay device capability reporting already discussed a few times, the question is if an overlay devices should claim a feature when it depends on underlay devices? The capability may be reported by the failsafe since it is transparent from fast free logic point of view. Why it is transparent? If one of the underlying device doesn't support/claim this feature, application can't use this feature with failsafe, isn't it? tx_offload_capa in failsafe is a mask to apply on sub-device capabilities. So, if the capability is not supported by any sub-device it will not be reported. As well if there is the capability bit in the mask, it will not be reported regardless sub-devices capabilities. The description for the patch above tries to explain it - it looks like not that successful. Given that no ack/review given to the patch, I am updating it as rejected. Is it a new policy? I thought that it was vice versa before. Hi Andrew, Yes policy is other-way around indeed, when there is no comment at all default behavior is accept, but please take above paragraph as my comment to the patch. Got it. And I was thinking it is a little controversial and there is no support to have it, so lets don't get it. What do you think? I see you motivation. Signed-off-by: Ivan Malov Signed-off-by: Andrew Rybchenko --- doc/guides/nics/features/failsafe.ini | 1 + drivers/net/failsafe/failsafe_ops.c | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/guides/nics/features/failsafe.ini b/doc/guides/nics/features/failsafe.ini index e3c4c08f2..b6f3dcee6 100644 --- a/doc/guides/nics/features/failsafe.ini +++ b/doc/guides/nics/features/failsafe.ini @@ -7,6 +7,7 @@ Link status = Y Link status event= Y Rx interrupt = Y +Fast mbuf free = Y Queue start/stop = Y Runtime Rx queue setup = Y Runtime Tx queue setup = Y diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c index 7f8bcd4c6..e3add404b 100644 --- a/drivers/net/failsafe/failsafe_ops.c +++ b/drivers/net/failsafe/failsafe_ops.c @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { DEV_RX_OFFLOAD_SECURITY, .tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_MBUF_FAST_FREE | DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM |
Re: [dpdk-dev] [PATCH v4 04/10] lib: introduce ipsec library
> > > > > > On 12/20/2018 7:36 PM, Ananyev, Konstantin wrote: > > > > > >>> diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c > > >>> new file mode 100644 > > >>> index 0..f927a82bf > > >>> --- /dev/null > > >>> +++ b/lib/librte_ipsec/sa.c > > >>> @@ -0,0 +1,327 @@ > > >>> +/* SPDX-License-Identifier: BSD-3-Clause > > >>> + * Copyright(c) 2018 Intel Corporation > > >>> + */ > > >>> + > > >>> +#include > > >>> +#include > > >>> +#include > > >>> +#include > > >>> + > > >>> +#include "sa.h" > > >>> +#include "ipsec_sqn.h" > > >>> + > > >>> +/* some helper structures */ > > >>> +struct crypto_xform { > > >>> + struct rte_crypto_auth_xform *auth; > > >>> + struct rte_crypto_cipher_xform *cipher; > > >>> + struct rte_crypto_aead_xform *aead; > > >>> +}; > > >> shouldn't this be union as aead cannot be with cipher and auth cases. > > > That's used internally to collect/analyze xforms provided by > > > prm->crypto_xform. > > > > > > > > > > >> extra line > > >>> + > > >>> + > > >>> +static int > > >>> +check_crypto_xform(struct crypto_xform *xform) > > >>> +{ > > >>> + uintptr_t p; > > >>> + > > >>> + p = (uintptr_t)xform->auth | (uintptr_t)xform->cipher; > > >> what is the intent of this? > > > It is used below to check that if aead is present both cipher and auth > > > are not. > > > > > >>> + > > >>> + /* either aead or both auth and cipher should be not NULLs */ > > >>> + if (xform->aead) { > > >>> + if (p) > > >>> + return -EINVAL; > > >>> + } else if (p == (uintptr_t)xform->auth) { > > >>> + return -EINVAL; > > >>> + } > > >> This function does not look good. It will miss the case of cipher only > > > Cipher only is not supported right now and I am not aware about plans > > > to support it in future. > > > If someone would like to add cipher onl,then yes he/she probably would > > > have to update this function. > > I know that cipher_only is not supported and nobody will support it in > > case of ipsec. > > My point is if somebody gives only auth or only cipher xform, then this > > function would not be able to detect that case and will not return error. > > fill_crypto_xform() (the function below) will detect it and return an error: > + if (xf->type == RTE_CRYPTO_SYM_XFORM_AUTH) { > + if (xform->auth != NULL) > + return -EINVAL; Please ignore the comment above - was thinking about different thing. Yes extra check is needed for case when only cipher xform is provided.
Re: [dpdk-dev] [PATCH v4 10/10] doc: add IPsec library guide
On 12/20/2018 6:36 PM, Ananyev, Konstantin wrote: > >>> --- /dev/null >>> +++ b/doc/guides/prog_guide/ipsec_lib.rst >>> @@ -0,0 +1,74 @@ >>> +.. SPDX-License-Identifier: BSD-3-Clause >>> +Copyright(c) 2018 Intel Corporation. >>> + >>> +IPsec Packet Processing Library >>> +=== >>> + >>> +The DPDK provides a library for IPsec data-path processing. >>> +The library utilizes existing DPDK crypto-dev and >>> +security API to provide application with transparent and >>> +high peromant IPsec packet processing API. >>> +The library is concentrated on data-path protocols processing >>> +(ESP and AH), IKE protocol(s) implementation is out of scope >>> +for that library. >> I do not see AH processing in the library > Right now it is not implemented. > But the whole library code structure allows it to be added (if someone would > decide to). specify this here. >>> + >>> +SA level API >>> + >>> + >>> +This API operates on IPsec SA level. >>> +It provides functionality that allows user for given SA to process >>> +inbound and outbound IPsec packets. >>> +To be more specific: >>> +* for inbound ESP/AH packets perform decryption, authentication, >>> integrity checking, remove ESP/AH related headers >>> +* for outbound packets perform payload encryption, attach ICV, update/add >>> IP headers, add ESP/AH headers/trailers, >>> +* setup related mbuf felids (ol_flags, tx_offloads, etc.). >>> +* initialize/un-initialize given SA based on user provided parameters. >>> + >>> +SA-level API is based on top of crypto-dev/security API and relies on >>> +them to perform actual cipher and integrity checking. >>> + >>> +Due to the nature of crypto-dev API (enqueue/deque model) library >>> introduces >>> +asynchronous API for IPsec packets destined to be processed by >>> crypto-device. >>> + >>> +Expected API call sequence for data-path processing would be: >>> + >>> +.. code-block:: c >>> + >>> +/* enqueue for processing by crypto-device */ >>> +rte_ipsec_pkt_crypto_prepare(...); >>> +rte_cryptodev_enqueue_burst(...); >>> +/* dequeue from crypto-device and do final processing (if any) */ >>> +rte_cryptodev_dequeue_burst(...); >>> +rte_ipsec_pkt_crypto_group(...); /* optional */ >>> +rte_ipsec_pkt_process(...); >>> + >>> +For packets destined for inline processing no extra overhead >>> +is required and synchronous API call: rte_ipsec_pkt_process() >>> +is sufficient for that case. >>> + >>> +.. note:: >>> + >>> +For more details about the IPsec API, please refer to the *DPDK API >>> Reference*. >>> + >>> +Current implementation supports all four currently defined rte_security >>> types: >>> +* RTE_SECURITY_ACTION_TYPE_NONE >>> + >>> +* RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO >>> + >>> +* RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL >>> + >>> +* RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL >>> + >> probably a code flow diagram should be added and explained in detail for >> each of the action types > I think it is way above my drawing capabilities :) I think you can refer to http://doc.dpdk.org/guides/prog_guide/rte_security.html something similar to that would explain it in a better way. > >>> +To accommodate future custom implementations function pointers >>> +model is used for both for *crypto_prepare* and *process* >>> +impelementations. >>> + >>> +Supported features: >>> +* ESP protocol tunnel mode. >>> + >>> +* ESP protocol transport mode. >>> + >>> +* ESN and replay window. >>> + >>> +* algorithms: AES-CBC, AES-GCM, HMAC-SHA1, NULL. >> The supported features should be elaborated further more. > Ok, anything specific information you think has to be added here? Probably a few lines to explain the feature(very brief) and how it is implemented in ipsec lib and the limitation if any
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
Hi Ferruh, Andrew, On Fri, Dec 21, 2018 at 03:52:07PM +0300, Andrew Rybchenko wrote: > Hi Ferruh, > > On 12/21/18 3:43 PM, Ferruh Yigit wrote: > > On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: > > > On 12/21/18 3:12 PM, Ferruh Yigit wrote: > > > > On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: > > > > > From: Ivan Malov > > > > > > > > > > This capability is reported when supported by the current emitting > > > > > sub-device. Failsafe PMD itself does not excercise fast free logic. > > > > I think overlay device capability reporting already discussed a few > > > > times, the > > > > question is if an overlay devices should claim a feature when it > > > > depends on > > > > underlay devices? > > > The capability may be reported by the failsafe since it is transparent > > > from > > > fast free logic point of view. > > Why it is transparent? If one of the underlying device doesn't support/claim > > this feature, application can't use this feature with failsafe, isn't it? If a VF is forbidden by its PF from adding MAC addresses, the driver should still advertize support for "Unicast MAC filter" right? This is the same here. Fail-safe needs to forward configurations items to its sub-device for a feature to work. Sometimes, the hardware won't be sufficient. But the fail-safe itself still has the parts needed (even if it is only a flag to add to a feature list). It is necessary, at the fail-safe level, to be able to describe the current feature set. This is what the feature list is for. There are important caveats to consider however, which is inherent to using the fail-safe. It does not mean those features should be removed from the fail-safe feature list. > > tx_offload_capa in failsafe is a mask to apply on sub-device capabilities. > So, if the capability is not supported by any sub-device it will not be > reported. > As well if there is the capability bit in the mask, it will not be reported > regardless > sub-devices capabilities. The description for the patch above tries to > explain it - > it looks like not that successful. > > > > > Given that no ack/review given to the patch, I am updating it as > > > > rejected. > > > Is it a new policy? I thought that it was vice versa before. > > Hi Andrew, > > > > Yes policy is other-way around indeed, when there is no comment at all > > default > > behavior is accept, but please take above paragraph as my comment to the > > patch. > > Got it. > > > And I was thinking it is a little controversial and there is no support to > > have > > it, so lets don't get it. What do you think? > > I see you motivation. > > > > > > Signed-off-by: Ivan Malov > > > > > Signed-off-by: Andrew Rybchenko > > > > > --- > > > > >doc/guides/nics/features/failsafe.ini | 1 + > > > > >drivers/net/failsafe/failsafe_ops.c | 1 + > > > > >2 files changed, 2 insertions(+) > > > > > > > > > > diff --git a/doc/guides/nics/features/failsafe.ini > > > > > b/doc/guides/nics/features/failsafe.ini > > > > > index e3c4c08f2..b6f3dcee6 100644 > > > > > --- a/doc/guides/nics/features/failsafe.ini > > > > > +++ b/doc/guides/nics/features/failsafe.ini > > > > > @@ -7,6 +7,7 @@ > > > > >Link status = Y > > > > >Link status event= Y > > > > >Rx interrupt = Y > > > > > +Fast mbuf free = Y > > > > >Queue start/stop = Y > > > > >Runtime Rx queue setup = Y > > > > >Runtime Tx queue setup = Y > > > > > diff --git a/drivers/net/failsafe/failsafe_ops.c > > > > > b/drivers/net/failsafe/failsafe_ops.c > > > > > index 7f8bcd4c6..e3add404b 100644 > > > > > --- a/drivers/net/failsafe/failsafe_ops.c > > > > > +++ b/drivers/net/failsafe/failsafe_ops.c > > > > > @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { > > > > > DEV_RX_OFFLOAD_SECURITY, > > > > > .tx_offload_capa = > > > > > DEV_TX_OFFLOAD_MULTI_SEGS | > > > > > + DEV_TX_OFFLOAD_MBUF_FAST_FREE | > > > > > DEV_TX_OFFLOAD_IPV4_CKSUM | > > > > > DEV_TX_OFFLOAD_UDP_CKSUM | > > > > > DEV_TX_OFFLOAD_TCP_CKSUM | > > > > > > -- Gaëtan Rivet 6WIND
Re: [dpdk-dev] [PATCH] telemetry: fix error when using ports of different types
On 19/12/2018 11:59, Bruce Richardson wrote: Different NIC ports can have different numbers of xstats on them, which means that we can't just use the xstats list from the first port registered in the telemetry library. Instead, we need to check the type of each port - by checking its ops structure pointer - and register each port type once with the metrics lib. CC: sta...@dpdk.org Fixes: fdbdb3f9ce46 ("telemetry: add initial connection socket") Signed-off-by: Bruce Richardson Acked-by: Kevin Laatz
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 12/21/2018 12:52 PM, Andrew Rybchenko wrote: > Hi Ferruh, > > On 12/21/18 3:43 PM, Ferruh Yigit wrote: >> On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: >>> On 12/21/18 3:12 PM, Ferruh Yigit wrote: On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: > From: Ivan Malov > > This capability is reported when supported by the current emitting > sub-device. Failsafe PMD itself does not excercise fast free logic. I think overlay device capability reporting already discussed a few times, the question is if an overlay devices should claim a feature when it depends on underlay devices? >>> The capability may be reported by the failsafe since it is transparent from >>> fast free logic point of view. >> Why it is transparent? If one of the underlying device doesn't support/claim >> this feature, application can't use this feature with failsafe, isn't it? > > tx_offload_capa in failsafe is a mask to apply on sub-device capabilities. I missed this one, I see why it is transparent. Why failsafe doesn't set a full tx_offload_capa MASK but maintain a list? > So, if the capability is not supported by any sub-device it will not be > reported. > As well if there is the capability bit in the mask, it will not be > reported regardless > sub-devices capabilities. The description for the patch above tries to > explain it - > it looks like not that successful. > Given that no ack/review given to the patch, I am updating it as rejected. >>> Is it a new policy? I thought that it was vice versa before. >> Hi Andrew, >> >> Yes policy is other-way around indeed, when there is no comment at all >> default >> behavior is accept, but please take above paragraph as my comment to the >> patch. > > Got it. > >> And I was thinking it is a little controversial and there is no support to >> have >> it, so lets don't get it. What do you think? > > I see you motivation. > > Signed-off-by: Ivan Malov > Signed-off-by: Andrew Rybchenko > --- >doc/guides/nics/features/failsafe.ini | 1 + >drivers/net/failsafe/failsafe_ops.c | 1 + >2 files changed, 2 insertions(+) > > diff --git a/doc/guides/nics/features/failsafe.ini > b/doc/guides/nics/features/failsafe.ini > index e3c4c08f2..b6f3dcee6 100644 > --- a/doc/guides/nics/features/failsafe.ini > +++ b/doc/guides/nics/features/failsafe.ini > @@ -7,6 +7,7 @@ >Link status = Y >Link status event= Y >Rx interrupt = Y > +Fast mbuf free = Y >Queue start/stop = Y >Runtime Rx queue setup = Y >Runtime Tx queue setup = Y > diff --git a/drivers/net/failsafe/failsafe_ops.c > b/drivers/net/failsafe/failsafe_ops.c > index 7f8bcd4c6..e3add404b 100644 > --- a/drivers/net/failsafe/failsafe_ops.c > +++ b/drivers/net/failsafe/failsafe_ops.c > @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { > DEV_RX_OFFLOAD_SECURITY, > .tx_offload_capa = > DEV_TX_OFFLOAD_MULTI_SEGS | > + DEV_TX_OFFLOAD_MBUF_FAST_FREE | > DEV_TX_OFFLOAD_IPV4_CKSUM | > DEV_TX_OFFLOAD_UDP_CKSUM | > DEV_TX_OFFLOAD_TCP_CKSUM | > > >
Re: [dpdk-dev] [PATCH v4 00/10] ipsec: new library for IPsec data-path processing
Hi Konstantin, I am done with the review, will be running the code in early next week after I finish the review of changes in ipsec application. key points for review were - some code may be generic and can be moved in appropriate files - documentation update - spell checks spacing etc. - some cases like cipher only need to be looked appropriately - test cases for lookaside and inline proto - checksum/ttl update With these comments we cannot make this to RC1, but RC2 can be looked upon. Thanks, Akhil On 12/14/2018 9:59 PM, Konstantin Ananyev wrote: > This patch series depends on the patch: > http://patches.dpdk.org/patch/48044/ > to be applied first. > > v3 -> v4 > - Changes to adress Declan comments > - Update docs > > v2 -> v3 > - Several fixes for IPv6 support > - Extra checks for input parameters in public APi functions > > v1 -> v2 > - Changes to get into account l2_len for outbound transport packets > (Qi comments) > - Several bug fixes > - Some code restructured > - Update MAINTAINERS file > > RFCv2 -> v1 > - Changes per Jerin comments > - Implement transport mode > - Several bug fixes > - UT largely reworked and extended > > This patch introduces a new library within DPDK: librte_ipsec. > The aim is to provide DPDK native high performance library for IPsec > data-path processing. > The library is supposed to utilize existing DPDK crypto-dev and > security API to provide application with transparent IPsec > processing API. > The library is concentrated on data-path protocols processing > (ESP and AH), IKE protocol(s) implementation is out of scope > for that library. > Current patch introduces SA-level API. > > SA (low) level API > == > > API described below operates on SA level. > It provides functionality that allows user for given SA to process > inbound and outbound IPsec packets. > To be more specific: > - for inbound ESP/AH packets perform decryption, authentication, >integrity checking, remove ESP/AH related headers > - for outbound packets perform payload encryption, attach ICV, >update/add IP headers, add ESP/AH headers/trailers, >setup related mbuf felids (ol_flags, tx_offloads, etc.). > - initialize/un-initialize given SA based on user provided parameters. > > The following functionality: >- match inbound/outbound packets to particular SA >- manage crypto/security devices >- provide SAD/SPD related functionality >- determine what crypto/security device has to be used > for given packet(s) > is out of scope for SA-level API. > > SA-level API is based on top of crypto-dev/security API and relies on > them > to perform actual cipher and integrity checking. > To have an ability to easily map crypto/security sessions into related > IPSec SA opaque userdata field was added into > rte_cryptodev_sym_session and rte_security_session structures. > That implies ABI change for both librte_crytpodev and librte_security. > > Due to the nature of crypto-dev API (enqueue/deque model) we use > asynchronous API for IPsec packets destined to be processed > by crypto-device. > Expected API call sequence would be: >/* enqueue for processing by crypto-device */ >rte_ipsec_pkt_crypto_prepare(...); >rte_cryptodev_enqueue_burst(...); >/* dequeue from crypto-device and do final processing (if any) */ >rte_cryptodev_dequeue_burst(...); >rte_ipsec_pkt_crypto_group(...); /* optional */ >rte_ipsec_pkt_process(...); > > Though for packets destined for inline processing no extra overhead > is required and synchronous API call: rte_ipsec_pkt_process() > is sufficient for that case. > > Current implementation supports all four currently defined > rte_security types. > Though to accommodate future custom implementations function pointers > model is used for both for *crypto_prepare* and *process* > impelementations. > > Konstantin Ananyev (10): >cryptodev: add opaque userdata pointer into crypto sym session >security: add opaque userdata pointer into security session >net: add ESP trailer structure definition >lib: introduce ipsec library >ipsec: add SA data-path API >ipsec: implement SA data-path API >ipsec: rework SA replay window/SQN for MT environment >ipsec: helper functions to group completed crypto-ops >test/ipsec: introduce functional test >doc: add IPsec library guide > > MAINTAINERS|5 + > config/common_base |5 + > doc/guides/prog_guide/index.rst|1 + > doc/guides/prog_guide/ipsec_lib.rst| 74 + > doc/guides/rel_notes/release_19_02.rst | 10 + > lib/Makefile |2 + > lib/librte_cryptodev/rte_cryptodev.h |2 + > lib/librte_ipsec/Makefile | 27 + > lib/librte_ipsec/crypto.h | 123 ++ > lib/librte_ipsec/iph.h | 84 + > lib/librte_ipsec/ipsec_sqn.h | 343 > lib/librte_ipsec
Re: [dpdk-dev] [PATCH 19.02 v2] malloc: fix deadlock when using malloc stats
21/12/2018 13:26, Anatoly Burakov: > Currently, malloc statistics and external heap creation code > use memory hotplug lock as a way to synchronize accesses to > heaps (as in, locking the hotplug lock to prevent list of heaps > from changing under our feet). At the same time, malloc > statistics code will also lock the heap because it needs to > access heap data and does not want any other thread to allocate > anything from that heap. > > In such scheme, it is possible to enter a deadlock with the > following sequence of events: > > thread 1 thread 2 > rte_malloc() > rte_malloc_dump_stats() > take heap lock > take hotplug lock > failed to allocate, > attempt to take > hotplug lock > attempt to take heap lock > > Neither thread will be able to continue, as both of them are > waiting for the other one to drop the lock. Adding an > additional lock will require an ABI change, so instead of > that, make malloc statistics calls thread-unsafe with > respect to creating/destroying heaps. > > Fixes: 72cf92b31855 ("malloc: index heaps using heap ID rather than NUMA > node") > Cc: sta...@dpdk.org > > Signed-off-by: Anatoly Burakov > --- > > Notes: > This is the best we can do for 19.02 without breaking ABI. Applied, thanks
[dpdk-dev] [PATCH v3 0/2] cryptodev: change qp conf and sym session
This patchset changes the queue pair configure structure and sym session structure for cryptodev. An RFC patch http://patchwork.dpdk.org/patch/43883/ has been sent to state the change. v3: - Rebased. - Fixed a few bugs. v2: - Removed incorrect config/common_base change - Added new API for existing session size - Changed element name of symmetric session Fan Zhang (2): cryptodev: change queue pair configure structure cryptodev: change symmetric session structure app/test-crypto-perf/cperf.h | 1 + app/test-crypto-perf/cperf_ops.c | 11 +- app/test-crypto-perf/cperf_ops.h | 2 +- app/test-crypto-perf/cperf_test_latency.c | 5 +- app/test-crypto-perf/cperf_test_latency.h | 1 + app/test-crypto-perf/cperf_test_pmd_cyclecount.c | 5 +- app/test-crypto-perf/cperf_test_pmd_cyclecount.h | 1 + app/test-crypto-perf/cperf_test_throughput.c | 5 +- app/test-crypto-perf/cperf_test_throughput.h | 1 + app/test-crypto-perf/cperf_test_verify.c | 5 +- app/test-crypto-perf/cperf_test_verify.h | 1 + app/test-crypto-perf/main.c| 105 ++--- doc/guides/rel_notes/release_19_02.rst | 19 ++ drivers/crypto/aesni_gcm/Makefile | 1 + drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 10 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 5 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h | 2 + drivers/crypto/aesni_gcm/meson.build | 1 + drivers/crypto/aesni_mb/Makefile | 1 + drivers/crypto/aesni_mb/meson.build| 2 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 10 +- drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c | 5 +- drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h | 2 + drivers/crypto/armv8/Makefile | 1 + drivers/crypto/armv8/rte_armv8_pmd.c | 10 +- drivers/crypto/armv8/rte_armv8_pmd_ops.c | 5 +- drivers/crypto/armv8/rte_armv8_pmd_private.h | 2 + drivers/crypto/caam_jr/caam_jr.c | 3 +- drivers/crypto/ccp/ccp_pmd_ops.c | 5 +- drivers/crypto/ccp/ccp_pmd_private.h | 2 + drivers/crypto/ccp/rte_ccp_pmd.c | 9 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c| 3 +- drivers/crypto/dpaa_sec/dpaa_sec.c | 3 +- drivers/crypto/kasumi/Makefile | 1 + drivers/crypto/kasumi/meson.build | 1 + drivers/crypto/kasumi/rte_kasumi_pmd.c | 10 +- drivers/crypto/kasumi/rte_kasumi_pmd_ops.c | 5 +- drivers/crypto/kasumi/rte_kasumi_pmd_private.h | 2 + drivers/crypto/mvsam/rte_mrvl_pmd_ops.c| 5 +- drivers/crypto/mvsam/rte_mrvl_pmd_private.h| 3 + drivers/crypto/null/null_crypto_pmd.c | 5 +- drivers/crypto/null/null_crypto_pmd_ops.c | 5 +- drivers/crypto/null/null_crypto_pmd_private.h | 2 + drivers/crypto/octeontx/otx_cryptodev_ops.c| 3 +- drivers/crypto/openssl/Makefile| 1 + drivers/crypto/openssl/meson.build | 1 + drivers/crypto/openssl/rte_openssl_pmd.c | 10 +- drivers/crypto/openssl/rte_openssl_pmd_ops.c | 5 +- drivers/crypto/openssl/rte_openssl_pmd_private.h | 2 + drivers/crypto/qat/qat_sym_pmd.c | 2 +- drivers/crypto/scheduler/scheduler_pmd_ops.c | 5 +- drivers/crypto/snow3g/Makefile | 1 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 10 +- drivers/crypto/snow3g/rte_snow3g_pmd_ops.c | 5 +- drivers/crypto/snow3g/rte_snow3g_pmd_private.h | 2 + drivers/crypto/virtio/virtio_cryptodev.c | 6 +- drivers/crypto/zuc/Makefile| 1 + drivers/crypto/zuc/meson.build | 1 + drivers/crypto/zuc/rte_zuc_pmd.c | 10 +- drivers/crypto/zuc/rte_zuc_pmd_ops.c | 5 +- drivers/crypto/zuc/rte_zuc_pmd_private.h | 2 + drivers/net/softnic/rte_eth_softnic_cli.c | 52 +++- drivers/net/softnic/rte_eth_softnic_cryptodev.c| 52 +++- drivers/net/softnic/rte_eth_softnic_internals.h| 3 + examples/fips_validation/main.c| 38 ++- examples/ip_pipeline/cli.c | 49 ++-- examples/ip_pipeline/cryptodev.c | 51 +++- examples/ip_pipeline/cryptodev.h | 3 + examples/ip_pipeline/examples/flow_crypto.cli | 9 +- examples/ipsec-secgw/ipsec-secgw.c | 51 ++-- examples/ipsec-secgw/ipsec.c | 2 +- examples/ipsec-secgw/ipsec.h | 2 + examples/l2fwd-crypto/main.c | 66 -- examples/vhost_crypto/main.c | 22 +- lib/librte_cryptod
[dpdk-dev] [PATCH v3 1/2] cryptodev: change queue pair configure structure
This patch changes the cryptodev queue pair configure structure to enable two mempool passed into cryptodev PMD simutaneously. Signed-off-by: Fan Zhang Acked-by: Fiona Trahe --- app/test-crypto-perf/main.c| 6 ++-- doc/guides/rel_notes/release_19_02.rst | 5 +++ drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 7 ++-- drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 5 +-- drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h | 2 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 7 ++-- drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c | 5 +-- drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h | 2 ++ drivers/crypto/armv8/rte_armv8_pmd.c | 7 ++-- drivers/crypto/armv8/rte_armv8_pmd_ops.c | 5 +-- drivers/crypto/armv8/rte_armv8_pmd_private.h | 2 ++ drivers/crypto/caam_jr/caam_jr.c | 3 +- drivers/crypto/ccp/ccp_pmd_ops.c | 5 +-- drivers/crypto/ccp/ccp_pmd_private.h | 2 ++ drivers/crypto/ccp/rte_ccp_pmd.c | 9 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c| 3 +- drivers/crypto/dpaa_sec/dpaa_sec.c | 3 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 7 ++-- drivers/crypto/kasumi/rte_kasumi_pmd_ops.c | 5 +-- drivers/crypto/kasumi/rte_kasumi_pmd_private.h | 2 ++ drivers/crypto/mvsam/rte_mrvl_pmd_ops.c| 5 +-- drivers/crypto/mvsam/rte_mrvl_pmd_private.h| 3 ++ drivers/crypto/null/null_crypto_pmd.c | 5 +-- drivers/crypto/null/null_crypto_pmd_ops.c | 5 +-- drivers/crypto/null/null_crypto_pmd_private.h | 2 ++ drivers/crypto/octeontx/otx_cryptodev_ops.c| 3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 7 ++-- drivers/crypto/openssl/rte_openssl_pmd_ops.c | 5 +-- drivers/crypto/openssl/rte_openssl_pmd_private.h | 2 ++ drivers/crypto/qat/qat_sym_pmd.c | 2 +- drivers/crypto/scheduler/scheduler_pmd_ops.c | 5 ++- drivers/crypto/snow3g/rte_snow3g_pmd.c | 7 ++-- drivers/crypto/snow3g/rte_snow3g_pmd_ops.c | 5 +-- drivers/crypto/snow3g/rte_snow3g_pmd_private.h | 2 ++ drivers/crypto/virtio/virtio_cryptodev.c | 6 ++-- drivers/crypto/zuc/rte_zuc_pmd.c | 7 ++-- drivers/crypto/zuc/rte_zuc_pmd_ops.c | 5 +-- drivers/crypto/zuc/rte_zuc_pmd_private.h | 2 ++ drivers/net/softnic/rte_eth_softnic_cryptodev.c| 2 +- examples/fips_validation/main.c| 4 +-- examples/ip_pipeline/cryptodev.c | 2 +- examples/ipsec-secgw/ipsec-secgw.c | 7 ++-- examples/l2fwd-crypto/main.c | 4 ++- examples/vhost_crypto/main.c | 9 -- lib/librte_cryptodev/rte_cryptodev.c | 5 ++- lib/librte_cryptodev/rte_cryptodev.h | 7 ++-- lib/librte_cryptodev/rte_cryptodev_pmd.h | 3 +- test/test/test_cryptodev.c | 37 +- test/test/test_cryptodev_asym.c| 8 ++--- test/test/test_event_crypto_adapter.c | 5 +-- 50 files changed, 156 insertions(+), 107 deletions(-) diff --git a/app/test-crypto-perf/main.c b/app/test-crypto-perf/main.c index 953e058c9..38a2e429f 100644 --- a/app/test-crypto-perf/main.c +++ b/app/test-crypto-perf/main.c @@ -218,6 +218,9 @@ cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs, session_pool_socket[socket_id] = sess_mp; } + qp_conf.mp_session = session_pool_socket[socket_id]; + qp_conf.mp_session_private = session_pool_socket[socket_id]; + ret = rte_cryptodev_configure(cdev_id, &conf); if (ret < 0) { printf("Failed to configure cryptodev %u", cdev_id); @@ -226,8 +229,7 @@ cperf_initialize_cryptodev(struct cperf_options *opts, uint8_t *enabled_cdevs, for (j = 0; j < opts->nb_qps; j++) { ret = rte_cryptodev_queue_pair_setup(cdev_id, j, - &qp_conf, socket_id, - session_pool_socket[socket_id]); + &qp_conf, socket_id); if (ret < 0) { printf("Failed to setup queue pair %u on " "cryptodev %u", j, cdev_id); diff --git a/doc/guides/rel_notes/release_19_02.rst b/doc/guides/rel_notes/release_19_02.rst index 47768288a..4420c2441 100644 --- a/doc/guides/rel_notes/release_19_02.rst +++ b/doc/guides/rel_notes/release_19_02.rst @@ -130,6 +130,11 @@ API Changes ``rte_pdump_init()`` and enum ``rte_pdump_socktype`` were deprecated since 18.05 and are removed in this release. +* cryptodev: as shown in the the 18.08 deprecation notice
[dpdk-dev] [PATCH v3 2/2] cryptodev: change symmetric session structure
This patch changes the symmetric session structure of cryptodev. The symmetric session now contains extra information for secure access purposes. The patch also includes the updates to the PMDs, test applications, and examples to fit the change. Signed-off-by: Fan Zhang --- app/test-crypto-perf/cperf.h | 1 + app/test-crypto-perf/cperf_ops.c | 11 +- app/test-crypto-perf/cperf_ops.h | 2 +- app/test-crypto-perf/cperf_test_latency.c| 5 +- app/test-crypto-perf/cperf_test_latency.h| 1 + app/test-crypto-perf/cperf_test_pmd_cyclecount.c | 5 +- app/test-crypto-perf/cperf_test_pmd_cyclecount.h | 1 + app/test-crypto-perf/cperf_test_throughput.c | 5 +- app/test-crypto-perf/cperf_test_throughput.h | 1 + app/test-crypto-perf/cperf_test_verify.c | 5 +- app/test-crypto-perf/cperf_test_verify.h | 1 + app/test-crypto-perf/main.c | 103 ++ doc/guides/rel_notes/release_19_02.rst | 14 ++ drivers/crypto/aesni_gcm/Makefile| 1 + drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 3 +- drivers/crypto/aesni_gcm/meson.build | 1 + drivers/crypto/aesni_mb/Makefile | 1 + drivers/crypto/aesni_mb/meson.build | 2 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 3 +- drivers/crypto/armv8/Makefile| 1 + drivers/crypto/armv8/rte_armv8_pmd.c | 3 +- drivers/crypto/kasumi/Makefile | 1 + drivers/crypto/kasumi/meson.build| 1 + drivers/crypto/kasumi/rte_kasumi_pmd.c | 3 +- drivers/crypto/openssl/Makefile | 1 + drivers/crypto/openssl/meson.build | 1 + drivers/crypto/openssl/rte_openssl_pmd.c | 3 +- drivers/crypto/snow3g/Makefile | 1 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 3 +- drivers/crypto/zuc/Makefile | 1 + drivers/crypto/zuc/meson.build | 1 + drivers/crypto/zuc/rte_zuc_pmd.c | 3 +- drivers/net/softnic/rte_eth_softnic_cli.c| 52 - drivers/net/softnic/rte_eth_softnic_cryptodev.c | 50 - drivers/net/softnic/rte_eth_softnic_internals.h | 3 + examples/fips_validation/main.c | 34 +++- examples/ip_pipeline/cli.c | 49 +++-- examples/ip_pipeline/cryptodev.c | 49 - examples/ip_pipeline/cryptodev.h | 3 + examples/ip_pipeline/examples/flow_crypto.cli| 9 +- examples/ipsec-secgw/ipsec-secgw.c | 48 +++-- examples/ipsec-secgw/ipsec.c | 2 +- examples/ipsec-secgw/ipsec.h | 2 + examples/l2fwd-crypto/main.c | 66 --- examples/vhost_crypto/main.c | 13 +- lib/librte_cryptodev/Makefile| 1 + lib/librte_cryptodev/meson.build | 1 + lib/librte_cryptodev/rte_cryptodev.c | 190 +++ lib/librte_cryptodev/rte_cryptodev.h | 65 ++- lib/librte_cryptodev/rte_cryptodev_pmd.h | 13 +- lib/librte_cryptodev/rte_cryptodev_version.map | 2 + lib/librte_vhost/rte_vhost_crypto.h | 9 +- lib/librte_vhost/vhost_crypto.c | 8 +- test/test/test_cryptodev.c | 229 ++- test/test/test_cryptodev_blockcipher.c | 7 +- test/test/test_cryptodev_blockcipher.h | 1 + test/test/test_event_crypto_adapter.c| 32 +++- 57 files changed, 845 insertions(+), 282 deletions(-) diff --git a/app/test-crypto-perf/cperf.h b/app/test-crypto-perf/cperf.h index db58228dc..2b0aad095 100644 --- a/app/test-crypto-perf/cperf.h +++ b/app/test-crypto-perf/cperf.h @@ -15,6 +15,7 @@ struct cperf_op_fns; typedef void *(*cperf_constructor_t)( struct rte_mempool *sess_mp, + struct rte_mempool *sess_priv_mp, uint8_t dev_id, uint16_t qp_id, const struct cperf_options *options, diff --git a/app/test-crypto-perf/cperf_ops.c b/app/test-crypto-perf/cperf_ops.c index 44808f50a..f59568b80 100644 --- a/app/test-crypto-perf/cperf_ops.c +++ b/app/test-crypto-perf/cperf_ops.c @@ -469,6 +469,7 @@ cperf_set_ops_aead(struct rte_crypto_op **ops, static struct rte_cryptodev_sym_session * cperf_create_session(struct rte_mempool *sess_mp, + struct rte_mempool *priv_mp, uint8_t dev_id, const struct cperf_options *options, const struct cperf_test_vector *test_vector, @@ -505,7 +506,7 @@ cperf_create_session(struct rte_mempool *sess_mp, } /* create crypto session */ rte_cryptodev_sym_session_init(dev_id, sess, &cipher_xform, - sess_mp); +
Re: [dpdk-dev] [PATCH v4 1/9] examples/ipsec-secgw: avoid to request unused TX offloads
Hi Konstantin, On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > ipsec-secgw always enables TX offloads > (DEV_TX_OFFLOAD_MULTI_SEGS, DEV_TX_OFFLOAD_SECURITY), > even when they are not requested by the config. > That causes many PMD to choose full-featured TX function, > which in many cases is much slower then one without offloads. > That patch adds checks to enabled extra HW offloads, only when > they were requested. > Plus it enables DEV_TX_OFFLOAD_IPV4_CKSUM, > only when other HW TX ofloads are going to be enabled. > Otherwise SW version of ip cksum calculation is used. > That allows to use vector TX function, when inline-ipsec is not > requested. > > Signed-off-by: Remy Horton > Signed-off-by: Konstantin Ananyev > Acked-by: Radu Nicolau > --- > examples/ipsec-secgw/ipsec-secgw.c | 44 +++ > examples/ipsec-secgw/ipsec.h | 6 > examples/ipsec-secgw/sa.c | 56 ++ > 3 files changed, 91 insertions(+), 15 deletions(-) > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > b/examples/ipsec-secgw/ipsec-secgw.c > index 1bc0b5b50..cfc2b05e5 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.c > +++ b/examples/ipsec-secgw/ipsec-secgw.c > @@ -208,8 +208,6 @@ static struct rte_eth_conf port_conf = { > }, > .txmode = { > .mq_mode = ETH_MQ_TX_NONE, > - .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM | > - DEV_TX_OFFLOAD_MULTI_SEGS), I believe this is disabling checksum offload for all cases and then enabling only for inline crypto and inline proto. This is breaking lookaside proto and lookaside none cases. Please correct me if I am wrong. So a NACK for this if my understanding is correct. > }, > }; > > @@ -315,7 +313,8 @@ prepare_traffic(struct rte_mbuf **pkts, struct > ipsec_traffic *t, > } > > static inline void > -prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > +prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port, > + const struct lcore_conf *qconf) > { > struct ip *ip; > struct ether_hdr *ethhdr; > @@ -325,14 +324,19 @@ prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > ethhdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, ETHER_HDR_LEN); > > if (ip->ip_v == IPVERSION) { > - pkt->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; > + pkt->ol_flags |= qconf->outbound.ipv4_offloads; > pkt->l3_len = sizeof(struct ip); > pkt->l2_len = ETHER_HDR_LEN; > > ip->ip_sum = 0; > + > + /* calculate IPv4 cksum in SW */ > + if ((pkt->ol_flags & PKT_TX_IP_CKSUM) == 0) > + ip->ip_sum = rte_ipv4_cksum((struct ipv4_hdr *)ip); > + > ethhdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); > } else { > - pkt->ol_flags |= PKT_TX_IPV6; > + pkt->ol_flags |= qconf->outbound.ipv6_offloads; > pkt->l3_len = sizeof(struct ip6_hdr); > pkt->l2_len = ETHER_HDR_LEN; > > @@ -346,18 +350,19 @@ prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > } > > static inline void > -prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t port) > +prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t port, > + const struct lcore_conf *qconf) > { > int32_t i; > const int32_t prefetch_offset = 2; > > for (i = 0; i < (nb_pkts - prefetch_offset); i++) { > rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]); > - prepare_tx_pkt(pkts[i], port); > + prepare_tx_pkt(pkts[i], port, qconf); > } > /* Process left packets */ > for (; i < nb_pkts; i++) > - prepare_tx_pkt(pkts[i], port); > + prepare_tx_pkt(pkts[i], port, qconf); > } > > /* Send burst of packets on an output interface */ > @@ -371,7 +376,7 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t > port) > queueid = qconf->tx_queue_id[port]; > m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table; > > - prepare_tx_burst(m_table, n, port); > + prepare_tx_burst(m_table, n, port, qconf); > > ret = rte_eth_tx_burst(port, queueid, m_table, n); > if (unlikely(ret < n)) { > @@ -1543,7 +1548,7 @@ cryptodevs_init(void) > } > > static void > -port_init(uint16_t portid) > +port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t > req_tx_offloads) > { > struct rte_eth_dev_info dev_info; > struct rte_eth_txconf *txconf; > @@ -1584,10 +1589,10 @@ port_init(uint16_t portid) > local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; > } > > - if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY) > - local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SECURITY; > - if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SECURITY) > - local_port_conf.txmode.offloads |= DEV_T
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 12/21/2018 1:16 PM, Gaëtan Rivet wrote: > Hi Ferruh, Andrew, > > On Fri, Dec 21, 2018 at 03:52:07PM +0300, Andrew Rybchenko wrote: >> Hi Ferruh, >> >> On 12/21/18 3:43 PM, Ferruh Yigit wrote: >>> On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: On 12/21/18 3:12 PM, Ferruh Yigit wrote: > On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: >> From: Ivan Malov >> >> This capability is reported when supported by the current emitting >> sub-device. Failsafe PMD itself does not excercise fast free logic. > I think overlay device capability reporting already discussed a few > times, the > question is if an overlay devices should claim a feature when it depends > on > underlay devices? The capability may be reported by the failsafe since it is transparent from fast free logic point of view. >>> Why it is transparent? If one of the underlying device doesn't support/claim >>> this feature, application can't use this feature with failsafe, isn't it? > > If a VF is forbidden by its PF from adding MAC addresses, the driver > should still advertize support for "Unicast MAC filter" right? > > This is the same here. Fail-safe needs to forward configurations items > to its sub-device for a feature to work. Sometimes, the hardware won't > be sufficient. But the fail-safe itself still has the parts needed (even > if it is only a flag to add to a feature list). I see your point, also I think it may be misleading for an overlay device to announce a feature which is completely depends underlay devices. Anyway this may be a nuance. I am OK with the change after Andrew's explanation, and as far as I understand you are OK too, may I add your explicit ack to the patch? > > It is necessary, at the fail-safe level, to be able to describe the > current feature set. This is what the feature list is for. There are > important caveats to consider however, which is inherent to using the > fail-safe. > > It does not mean those features should be removed from the fail-safe > feature list. > >> >> tx_offload_capa in failsafe is a mask to apply on sub-device capabilities. >> So, if the capability is not supported by any sub-device it will not be >> reported. >> As well if there is the capability bit in the mask, it will not be reported >> regardless >> sub-devices capabilities. The description for the patch above tries to >> explain it - >> it looks like not that successful. >> > Given that no ack/review given to the patch, I am updating it as rejected. Is it a new policy? I thought that it was vice versa before. >>> Hi Andrew, >>> >>> Yes policy is other-way around indeed, when there is no comment at all >>> default >>> behavior is accept, but please take above paragraph as my comment to the >>> patch. >> >> Got it. >> >>> And I was thinking it is a little controversial and there is no support to >>> have >>> it, so lets don't get it. What do you think? >> >> I see you motivation. >> >> Signed-off-by: Ivan Malov >> Signed-off-by: Andrew Rybchenko >> --- >>doc/guides/nics/features/failsafe.ini | 1 + >>drivers/net/failsafe/failsafe_ops.c | 1 + >>2 files changed, 2 insertions(+) >> >> diff --git a/doc/guides/nics/features/failsafe.ini >> b/doc/guides/nics/features/failsafe.ini >> index e3c4c08f2..b6f3dcee6 100644 >> --- a/doc/guides/nics/features/failsafe.ini >> +++ b/doc/guides/nics/features/failsafe.ini >> @@ -7,6 +7,7 @@ >>Link status = Y >>Link status event= Y >>Rx interrupt = Y >> +Fast mbuf free = Y >>Queue start/stop = Y >>Runtime Rx queue setup = Y >>Runtime Tx queue setup = Y >> diff --git a/drivers/net/failsafe/failsafe_ops.c >> b/drivers/net/failsafe/failsafe_ops.c >> index 7f8bcd4c6..e3add404b 100644 >> --- a/drivers/net/failsafe/failsafe_ops.c >> +++ b/drivers/net/failsafe/failsafe_ops.c >> @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { >> DEV_RX_OFFLOAD_SECURITY, >> .tx_offload_capa = >> DEV_TX_OFFLOAD_MULTI_SEGS | >> +DEV_TX_OFFLOAD_MBUF_FAST_FREE | >> DEV_TX_OFFLOAD_IPV4_CKSUM | >> DEV_TX_OFFLOAD_UDP_CKSUM | >> DEV_TX_OFFLOAD_TCP_CKSUM | >> >> >
Re: [dpdk-dev] [PATCH v4 2/9] examples/ipsec-secgw: allow to specify neighbor mac address
Hi, On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > In some cases it is useful to allow user to specify destination > ether address for outgoing packets. > This patch adds such ability by introducing new 'neigh' config > file option. I do not see an example config file option about how to use this option and no update is done to documentation for this newly added option. > Signed-off-by: Konstantin Ananyev > Acked-by: Radu Nicolau >
Re: [dpdk-dev] [dpdk-stable] [PATCH ] net/avf/base: fix comment referencing internal data
On 12/21/2018 12:16 PM, Rami Rosen wrote: > DCR is Intel internal information, no need to be in public code. > See also a parallel patch by Ferruh, commit > 1a0833efde70 ("net/i40e/base: fix comment referencing internal data"). > > Fixes: e5b2a9e957e7 ("net/avf/base: add base code for avf PMD") > Cc: sta...@dpdk.org > > Signed-off-by: Rami Rosen Reviewed-by: Ferruh Yigit Applied to dpdk-next-net/master, thanks.
[dpdk-dev] [PATCH] cryptodev: fix PMD memory leak
This patch fixes the memory leak during queue pair release. Originally the operation ring is not freed when releasing queue pair, cause the next queue_pair configure call fail and memory leak. Fixes: eec136f3c54f ("aesni_gcm: add driver for AES-GCM crypto operations") Fixes: cf7685d68f00 ("crypto/zuc: add driver for ZUC library") Fixes: d61f70b4c918 ("crypto/libcrypto: add driver for OpenSSL library") Fixes: 3aafc423cf4d ("snow3g: add driver for SNOW 3G library") Cc: sta...@dpdk.org Signed-off-by: Fan Zhang --- drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 5 + drivers/crypto/openssl/rte_openssl_pmd_ops.c | 5 + drivers/crypto/snow3g/rte_snow3g_pmd_ops.c | 5 + drivers/crypto/zuc/rte_zuc_pmd_ops.c | 5 + 4 files changed, 20 insertions(+) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c index 2f70f2a1a..bf69dde54 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c @@ -153,6 +153,11 @@ static int aesni_gcm_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { if (dev->data->queue_pairs[qp_id] != NULL) { + struct aesni_gcm_qp *qp = dev->data->queue_pairs[qp_id]; + + if (qp->processed_pkts) + rte_ring_free(qp->processed_pkts); + rte_free(dev->data->queue_pairs[qp_id]); dev->data->queue_pairs[qp_id] = NULL; } diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c index 5644f593a..40217cf0d 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c +++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c @@ -657,6 +657,11 @@ static int openssl_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { if (dev->data->queue_pairs[qp_id] != NULL) { + struct openssl_qp *qp = dev->data->queue_pairs[qp_id]; + + if (qp->processed_ops) + rte_ring_free(qp->processed_ops); + rte_free(dev->data->queue_pairs[qp_id]); dev->data->queue_pairs[qp_id] = NULL; } diff --git a/drivers/crypto/snow3g/rte_snow3g_pmd_ops.c b/drivers/crypto/snow3g/rte_snow3g_pmd_ops.c index fad483c75..d2125233f 100644 --- a/drivers/crypto/snow3g/rte_snow3g_pmd_ops.c +++ b/drivers/crypto/snow3g/rte_snow3g_pmd_ops.c @@ -142,6 +142,11 @@ static int snow3g_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { if (dev->data->queue_pairs[qp_id] != NULL) { + struct snow3g_qp *qp = dev->data->queue_pairs[qp_id]; + + if (qp->processed_ops) + rte_ring_free(qp->processed_ops); + rte_free(dev->data->queue_pairs[qp_id]); dev->data->queue_pairs[qp_id] = NULL; } diff --git a/drivers/crypto/zuc/rte_zuc_pmd_ops.c b/drivers/crypto/zuc/rte_zuc_pmd_ops.c index 7bd985fc1..1b88947eb 100644 --- a/drivers/crypto/zuc/rte_zuc_pmd_ops.c +++ b/drivers/crypto/zuc/rte_zuc_pmd_ops.c @@ -142,6 +142,11 @@ static int zuc_pmd_qp_release(struct rte_cryptodev *dev, uint16_t qp_id) { if (dev->data->queue_pairs[qp_id] != NULL) { + struct zuc_qp *qp = dev->data->queue_pairs[qp_id]; + + if (qp->processed_ops) + rte_ring_free(qp->processed_ops); + rte_free(dev->data->queue_pairs[qp_id]); dev->data->queue_pairs[qp_id] = NULL; } -- 2.13.6
Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op might never get dequeued
On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > In some cases crypto-ops could never be dequeued from the crypto-device. > The easiest way to reproduce: > start ipsec-secgw with crypto-dev and send to it less then 32 packets. > none packets will be forwarded. > Reason for that is that the application does dequeue() from crypto-queues > only when new packets arrive. > This patch makes sure it calls dequeue() on a regular basis. > > Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop") Thanks for looking into this age long issue of ipsec-secgw. But wouldn't this cause packet reordering, and the packets which are somehow left in the queue will get delayed and would be dropped subsequently due to anti-replay late? > Signed-off-by: Konstantin Ananyev > Acked-by: Radu Nicolau > --- >
Re: [dpdk-dev] [dpdk-stable] [PATCH] app/testpmd: fixed get RSS conf
On 12/21/2018 1:01 PM, Qiming Yang wrote: > I40e do not allow to get rss hena only, need to get rss > key meanwhile. i40e_get_rss_key() returns error if 'rss_conf.rss_key' is NULL, this patch is fixing the error case. But instead of fixing this in application level, it can be better to fix in driver level. Because 'rte_eth_dev_rss_hash_conf_get()' API doesn't dictate 'rss_conf.rss_key' to be not NULL, so any other application can cause same problem. Possible solution can be to have an interim 'key' buffer in 'i40e_dev_rss_hash_conf_get()' and copy it to 'rss_conf.rss_key' if it is not NULL. > > Fixes: 16321de09396 ("ethdev: allow to get RSS hash functions and key") > Cc: sta...@dpdk.org > > Signed-off-by: Qiming Yang > --- > app/test-pmd/config.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c > index b9e5dd9..482c4f5 100644 > --- a/app/test-pmd/config.c > +++ b/app/test-pmd/config.c > @@ -1764,8 +1764,7 @@ port_rss_hash_conf_show(portid_t port_id, int > show_rss_key) > return; > } > > - /* Get RSS hash key if asked to display it */ > - rss_conf.rss_key = (show_rss_key) ? rss_key : NULL; > + rss_conf.rss_key = rss_key; > rss_conf.rss_key_len = hash_key_size; > diag = rte_eth_dev_rss_hash_conf_get(port_id, &rss_conf); > if (diag != 0) { > @@ -1793,6 +1792,8 @@ port_rss_hash_conf_show(portid_t port_id, int > show_rss_key) > printf("%s ", rss_type_table[i].str); > } > printf("\n"); > + > + /* Get RSS hash key if asked to display it */ > if (!show_rss_key) > return; > printf("RSS key:\n"); >
Re: [dpdk-dev] [PATCH v4 4/9] examples/ipsec-secgw: fix outbound codepath for single SA
On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > Looking at process_pkts_outbound_nosp() there seems few issues: > - accessing mbuf after it was freed > - invoking ipsec_outbound() for ipv4 packets only > - copying number of packets, but not the mbuf pointers itself > > that patch provides fixes for that issues. > > Fixes: 906257e965b7 ("examples/ipsec-secgw: support IPv6") > > Signed-off-by: Konstantin Ananyev > Acked-by: Radu Nicolau > --- > examples/ipsec-secgw/ipsec-secgw.c | 32 -- > 1 file changed, 22 insertions(+), 10 deletions(-) > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > b/examples/ipsec-secgw/ipsec-secgw.c > index 62443172a..d1da2d5ce 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.c > +++ b/examples/ipsec-secgw/ipsec-secgw.c > @@ -616,32 +616,44 @@ process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx, > struct ipsec_traffic *traffic) > { > struct rte_mbuf *m; > - uint32_t nb_pkts_out, i; > + uint32_t nb_pkts_out, i, n; > struct ip *ip; > > /* Drop any IPsec traffic from protected ports */ > for (i = 0; i < traffic->ipsec.num; i++) > rte_pktmbuf_free(traffic->ipsec.pkts[i]); > > - traffic->ipsec.num = 0; > + n = 0; > > - for (i = 0; i < traffic->ip4.num; i++) > - traffic->ip4.res[i] = single_sa_idx; > + for (i = 0; i < traffic->ip4.num; i++) { > + traffic->ipsec.pkts[n] = traffic->ip4.pkts[i]; > + traffic->ipsec.res[n++] = single_sa_idx; > + } > > - for (i = 0; i < traffic->ip6.num; i++) > - traffic->ip6.res[i] = single_sa_idx; > + for (i = 0; i < traffic->ip6.num; i++) { > + traffic->ipsec.pkts[n] = traffic->ip6.pkts[i]; > + traffic->ipsec.res[n++] = single_sa_idx; > + } > + > + traffic->ip4.num = 0; > + traffic->ip6.num = 0; > + traffic->ipsec.num = n; > > - nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ip4.pkts, > - traffic->ip4.res, traffic->ip4.num, > + nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipsec.pkts, > + traffic->ipsec.res, traffic->ipsec.num, > MAX_PKT_BURST); > > /* They all sue the same SA (ip4 or ip6 tunnel) */ > m = traffic->ipsec.pkts[i]; > ip = rte_pktmbuf_mtod(m, struct ip *); > - if (ip->ip_v == IPVERSION) > + if (ip->ip_v == IPVERSION) { > traffic->ip4.num = nb_pkts_out; > - else > + for (i = 0; i < nb_pkts_out; i++) > + traffic->ip4.pkts[i] = traffic->ipsec.pkts[i]; > + } else { > traffic->ip6.num = nb_pkts_out; > + traffic->ip6.pkts[i] = traffic->ipsec.pkts[i]; you don't need a for loop here?? > + } > } > > static inline int32_t
Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
> >>> + */ > >>> + > >>> +/* > >>> + * Move preceding (L3) headers down to remove ESP header and IV. > >>> + */ > >> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths. > > We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len. > > But apart from that for transport mode we have to move actual packet > > headers. > > Let say for inbound we have to get rid of ESP header (which is after IP > > header), > > but preserve IP header, so we moving L2/L3 headers down, overwriting ESP > > header. > ok got your point > >> I believe these adjustments are happening in the mbuf itself. > >> Moreover these APIs are not specific to esp headers. > > I didn't get your last sentence: that function is used to remove esp header > > (see above) - that's why I named it that way. > These can be used to remove any header and not specifically esp. So this > API could be generic in rte_mbuf. That function has nothing to do with mbuf in general. It just copies bytes between overlapping in certain way buffers (src.start < dst.start < src.end < dst.end). Right now it is very primitive - copies on byte at a time in descending order. Wrote it just to avoid using memmove(). I don't think there is any point to have such dummy function in the lib/eal. > > > >>> +static inline void > >>> +remove_esph(char *np, char *op, uint32_t hlen) > >>> +{ > >>> + uint32_t i; > >>> + > >>> + for (i = hlen; i-- != 0; np[i] = op[i]) > >>> + ; > >>> +} > >>> + > >>> +/* > > > >>> + > >>> +/* update original and new ip header fields for tunnel case */ > >>> +static inline void > >>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > >>> + uint32_t l2len, rte_be16_t pid) > >>> +{ > >>> + struct ipv4_hdr *v4h; > >>> + struct ipv6_hdr *v6h; > >>> + > >>> + if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) { > >>> + v4h = p; > >>> + v4h->packet_id = pid; > >>> + v4h->total_length = rte_cpu_to_be_16(plen - l2len); > >> where are we updating the rest of the fields, like ttl, checksum, ip > >> addresses, etc > > TTL, ip addresses and other fileds supposed to be setuped by user > > and provided via rte_ipsec_sa_init(): > > struct rte_ipsec_sa_prm.tun.hdr should contain prepared template > > for L3(and L2 if user wants to) header. > > Checksum calculation is not done inside the lib right now - > > it is a user responsibility to caclucate/set it after librte_ipsec > > finishes processing the packet. > I believe static fields are updated during sa init but some fields like > ttl and checksum, > can be updated in the library itself which is updated for every packet. > (https://tools.ietf.org/html/rfc1624) About checksum - there is no point to calculate cksum it in the lib, as user may choose to use HW chksum offload. All other libraries ip_frag, GSO, etc. leave it to the user, I don't see why ipsec should be different here. About TTL and other fields - I suppose you refer to: https://tools.ietf.org/html/rfc4301#section-5.1.2 Header Construction for Tunnel Mode right? Surely that has to be supported, one way or the other, but we don't plan to implement it in 19.02. Current plan to add it in 19.05, if time permits. > > > >>> + } else { > >>> + v6h = p; > >>> + v6h->payload_len = rte_cpu_to_be_16(plen - l2len - > >>> + sizeof(*v6h)); > >>> + } > >>> +} > >>> + > >>> +#endif /* _IPH_H_ */ > >>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h > >>> index 1935f6e30..6e18c34eb 100644 > >>> --- a/lib/librte_ipsec/ipsec_sqn.h > >>> +++ b/lib/librte_ipsec/ipsec_sqn.h > >>> @@ -15,6 +15,45 @@ > >>> > >>>#define IS_ESN(sa) ((sa)->sqn_mask == UINT64_MAX) > >>> > >>> +/* > >>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order. > >>> + */ > >>> +static inline rte_be32_t > >>> +sqn_hi32(rte_be64_t sqn) > >>> +{ > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > >>> + return (sqn >> 32); > >>> +#else > >>> + return sqn; > >>> +#endif > >>> +} > >>> + > >>> +/* > >>> + * gets SQN.low32 bits, SQN supposed to be in network byte order. > >>> + */ > >>> +static inline rte_be32_t > >>> +sqn_low32(rte_be64_t sqn) > >>> +{ > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > >>> + return sqn; > >>> +#else > >>> + return (sqn >> 32); > >>> +#endif > >>> +} > >>> + > >>> +/* > >>> + * gets SQN.low16 bits, SQN supposed to be in network byte order. > >>> + */ > >>> +static inline rte_be16_t > >>> +sqn_low16(rte_be64_t sqn) > >>> +{ > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > >>> + return sqn; > >>> +#else > >>> + return (sqn >> 48); > >>> +#endif > >>> +} > >>> + > >> shouldn't we move these seq number APIs in rte_esp.h and make them generic > > It could be done, but who will use them except librte_ipsec? > Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is > just for that only, otherwise we don't need rte_esp.h, we can have the > content of rte_esp.h in ipsec itself. Again these functions are used
Re: [dpdk-dev] [PATCH] kni: fix build on RHEL 7.6-ALT on IBM POWER9
On 12/21/2018 6:03 AM, David Zeng wrote: > Signed-off-by: David Zeng > --- > kernel/linux/kni/compat.h | 8 +++- > 1 file changed, 7 insertions(+), 1 deletion(-) > > diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h > index 5aadebb..1afa3b8 100644 > --- a/kernel/linux/kni/compat.h > +++ b/kernel/linux/kni/compat.h > @@ -102,8 +102,14 @@ > #undef NET_NAME_UNKNOWN > #endif > > +/* > + * RHEL has two different version with different kernel version: > + * 3.10 is for AMD, Intel, IBM POWER7 and POWER8; > + * 4.14 is for ARM and IBM POWER9 > + */ > #if (defined(RHEL_RELEASE_CODE) && \ > - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))) > + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ > + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))) > #define ndo_change_mtu ndo_change_mtu_rh74 > #endif Do you need to update 'kernel/linux/kni/ethtool/igb/kcompat.h' for same check? Also there is another update on same line for RHEL8 fix [1], if you can rebase on top of it makes maintainers life easier. [1] https://patches.dpdk.org/patch/49104/ Thanks, ferruh
Re: [dpdk-dev] [RFC 00/14] prefix network structures
> On Dec 20, 2018, at 5:48 PM, Stephen Hemminger > wrote: > > On Thu, 20 Dec 2018 21:59:37 + > Ferruh Yigit wrote: > >> On 10/24/2018 9:18 AM, olivier.matz at 6wind.com (Olivier Matz) wrote: >>> This RFC targets 19.02. >>> >>> The rte_net headers conflict with the libc headers, because >>> some definitions are duplicated, sometimes with few differences. >>> This was discussed in [1], and more recently at the techboard. >>> >>> Before sending the deprecation notice (target for this is 18.11), >>> here is a draft that can be discussed. >>> >>> This RFC adds the rte_ (or RTE_) prefix to all structures, functions >>> and defines in rte_net library. This is a big changeset, that will >>> break the API of many functions, but not the ABI. >>> >>> One question I'm asking is how can we manage the transition. >>> Initially, I hoped it was possible to have a compat layer during >>> one release (supporting both prefixed and unprefixed names), but >>> now that the patch is done, it seems the impact is too big, and >>> impacts too many libraries. >>> >>> Few examples: >>> - rte_eth_macaddr_get/add/remove() use a (struct rte_ether_addr *) >>> - many rte_flow structures use the rte_ prefixed net structures >>> - the mac field of virtio_net structure is rte_ether_addr >>> - the first arg of rte_thash_load_v6_addrs is (struct rte_ipv6_hdr *) >>> ... >>> >>> Therefore, it is clear that doing this would break the compilation >>> of many external applications. >>> >>> Another drawback we need to take in account: it will make the >>> backport of patches more difficult, although this is something >>> that could be tempered by a script. >>> >>> While it is obviously better to have a good namespace convention, >>> we need to identify the issues we have today before deciding it's >>> worth doing the change. >>> >>> Comments? >> >> Is there an consensus about the patchset? There was a decision on techboard >> to >> go with this change (adding rte_ prefix) [1]. >> >> >> This is something that will affect DPDK consumers. Since many APIs are >> changing >> most probably will break API compatibility for many libraries. >> >> Meanwhile the conflict with the libc headers mentioned a few times in the >> past, >> this is something we need to fix. >> >> There are a few comments reluctant to this big modification, but what I >> understand from Olivier's response both using BSD defines or having >> compatibility headers in DPDK won't solve the problem completely. >> >> And assuming we will continue with this solution, another question is do we >> still want to push in 19.02 scope? (And from process point of view I think a >> deprecation notice is not merged for this change in 18.11 scope.) >> >> With the prediction of 19.05 will be big and already break API/ABI for some >> libraries, can we push this into 19.05 as an early merge into repo? >> >> And I think this patch will affect LTS releases and will break auto >> backporting >> for many fixes because it touches many places, so pushing this change even to >> next LTS (19.11) can be an option. >> >> >> Olivier, Thomas, >> >> What do you think about postponing this to 19.05 or even 19.11 ? >> >> >> >> [1] >> https://mails.dpdk.org/archives/dev/2018-October/116695.html >> >>> >>> >>> Things that are missing in RFC: >>> - test with FreeBSD >>> - manually fix some indentation issues >>> >>> >>> Olivier Matz (14): >>> net: add rte prefix to arp structures >>> net: add rte prefix to arp defines >>> net: add rte prefix to ether structures >>> net: add rte prefix to ether functions >>> net: add rte prefix to ether defines >>> net: add rte prefix to esp structure >>> net: add rte prefix to gre structure >>> net: add rte prefix to icmp structure >>> net: add rte prefix to icmp defines >>> net: add rte prefix to ip structure >>> net: add rte prefix to ip defines >>> net: add rte prefix to sctp structure >>> net: add rte prefix to tcp structure >>> net: add rte prefix to udp structure >> >> > > Sigh. Another case where DPDK has to reinvent something because > we can't figure out how to do dependent libraries correctly. > I would have rather just using the existing Linux, BSD definitions > and fixing the DPDK code. > > It is probably the only viable compromise, but it adds to long > term maintenance, and breaks DPDK applications. Neither of which > is a good thing. > > Should this be done by marking the old structure deprecated > first? Ideally, spread over three releases: first, tell the users > in the documentation it is coming; second, mark the old structures > as deprecated causing compiler warnings; third, remove the old > definitions. Doing at once is introducing user pain for no gain. +1 Regards, Keith
Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
21/12/2018 15:27, Ananyev, Konstantin: > > > >>> + */ > > >>> + > > >>> +/* > > >>> + * Move preceding (L3) headers down to remove ESP header and IV. > > >>> + */ > > >> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths. > > > We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len. > > > But apart from that for transport mode we have to move actual packet > > > headers. > > > Let say for inbound we have to get rid of ESP header (which is after IP > > > header), > > > but preserve IP header, so we moving L2/L3 headers down, overwriting ESP > > > header. > > ok got your point > > >> I believe these adjustments are happening in the mbuf itself. > > >> Moreover these APIs are not specific to esp headers. > > > I didn't get your last sentence: that function is used to remove esp > > > header > > > (see above) - that's why I named it that way. > > These can be used to remove any header and not specifically esp. So this > > API could be generic in rte_mbuf. > > That function has nothing to do with mbuf in general. > It just copies bytes between overlapping in certain way buffers > (src.start < dst.start < src.end < dst.end). > Right now it is very primitive - copies on byte at a time in > descending order. > Wrote it just to avoid using memmove(). > I don't think there is any point to have such dummy function in the lib/eal. > > > > > > >>> +static inline void > > >>> +remove_esph(char *np, char *op, uint32_t hlen) > > >>> +{ > > >>> + uint32_t i; > > >>> + > > >>> + for (i = hlen; i-- != 0; np[i] = op[i]) > > >>> + ; > > >>> +} > > >>> + > > >>> +/* > > > > > >>> + > > >>> +/* update original and new ip header fields for tunnel case */ > > >>> +static inline void > > >>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > > >>> + uint32_t l2len, rte_be16_t pid) > > >>> +{ > > >>> + struct ipv4_hdr *v4h; > > >>> + struct ipv6_hdr *v6h; > > >>> + > > >>> + if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) { > > >>> + v4h = p; > > >>> + v4h->packet_id = pid; > > >>> + v4h->total_length = rte_cpu_to_be_16(plen - l2len); > > >> where are we updating the rest of the fields, like ttl, checksum, ip > > >> addresses, etc > > > TTL, ip addresses and other fileds supposed to be setuped by user > > > and provided via rte_ipsec_sa_init(): > > > struct rte_ipsec_sa_prm.tun.hdr should contain prepared template > > > for L3(and L2 if user wants to) header. > > > Checksum calculation is not done inside the lib right now - > > > it is a user responsibility to caclucate/set it after librte_ipsec > > > finishes processing the packet. > > I believe static fields are updated during sa init but some fields like > > ttl and checksum, > > can be updated in the library itself which is updated for every packet. > > (https://tools.ietf.org/html/rfc1624) > > About checksum - there is no point to calculate cksum it in the lib, > as user may choose to use HW chksum offload. > All other libraries ip_frag, GSO, etc. leave it to the user, > I don't see why ipsec should be different here. > About TTL and other fields - I suppose you refer to: > https://tools.ietf.org/html/rfc4301#section-5.1.2 > Header Construction for Tunnel Mode > right? > Surely that has to be supported, one way or the other, > but we don't plan to implement it in 19.02. > Current plan to add it in 19.05, if time permits. > > > > > > >>> + } else { > > >>> + v6h = p; > > >>> + v6h->payload_len = rte_cpu_to_be_16(plen - l2len - > > >>> + sizeof(*v6h)); > > >>> + } > > >>> +} > > >>> + > > >>> +#endif /* _IPH_H_ */ > > >>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h > > >>> index 1935f6e30..6e18c34eb 100644 > > >>> --- a/lib/librte_ipsec/ipsec_sqn.h > > >>> +++ b/lib/librte_ipsec/ipsec_sqn.h > > >>> @@ -15,6 +15,45 @@ > > >>> > > >>>#define IS_ESN(sa) ((sa)->sqn_mask == UINT64_MAX) > > >>> > > >>> +/* > > >>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order. > > >>> + */ > > >>> +static inline rte_be32_t > > >>> +sqn_hi32(rte_be64_t sqn) > > >>> +{ > > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > > >>> + return (sqn >> 32); > > >>> +#else > > >>> + return sqn; > > >>> +#endif > > >>> +} > > >>> + > > >>> +/* > > >>> + * gets SQN.low32 bits, SQN supposed to be in network byte order. > > >>> + */ > > >>> +static inline rte_be32_t > > >>> +sqn_low32(rte_be64_t sqn) > > >>> +{ > > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > > >>> + return sqn; > > >>> +#else > > >>> + return (sqn >> 32); > > >>> +#endif > > >>> +} > > >>> + > > >>> +/* > > >>> + * gets SQN.low16 bits, SQN supposed to be in network byte order. > > >>> + */ > > >>> +static inline rte_be16_t > > >>> +sqn_low16(rte_be64_t sqn) > > >>> +{ > > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > > >>> + return sqn; > > >>>
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On Fri, Dec 21, 2018 at 01:59:20PM +, Ferruh Yigit wrote: > On 12/21/2018 1:16 PM, Gaëtan Rivet wrote: > > Hi Ferruh, Andrew, > > > > On Fri, Dec 21, 2018 at 03:52:07PM +0300, Andrew Rybchenko wrote: > >> Hi Ferruh, > >> > >> On 12/21/18 3:43 PM, Ferruh Yigit wrote: > >>> On 12/21/2018 12:28 PM, Andrew Rybchenko wrote: > On 12/21/18 3:12 PM, Ferruh Yigit wrote: > > On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: > >> From: Ivan Malov > >> > >> This capability is reported when supported by the current emitting > >> sub-device. Failsafe PMD itself does not excercise fast free logic. > > I think overlay device capability reporting already discussed a few > > times, the > > question is if an overlay devices should claim a feature when it > > depends on > > underlay devices? > The capability may be reported by the failsafe since it is transparent > from > fast free logic point of view. > >>> Why it is transparent? If one of the underlying device doesn't > >>> support/claim > >>> this feature, application can't use this feature with failsafe, isn't it? > > > > If a VF is forbidden by its PF from adding MAC addresses, the driver > > should still advertize support for "Unicast MAC filter" right? > > > > This is the same here. Fail-safe needs to forward configurations items > > to its sub-device for a feature to work. Sometimes, the hardware won't > > be sufficient. But the fail-safe itself still has the parts needed (even > > if it is only a flag to add to a feature list). > > I see your point, also I think it may be misleading for an overlay device to > announce a feature which is completely depends underlay devices. Anyway this > may > be a nuance. > > I am OK with the change after Andrew's explanation, and as far as I understand > you are OK too, may I add your explicit ack to the patch? > Yes the patch is ok for me, thank you Ferruh. > > > > It is necessary, at the fail-safe level, to be able to describe the > > current feature set. This is what the feature list is for. There are > > important caveats to consider however, which is inherent to using the > > fail-safe. > > > > It does not mean those features should be removed from the fail-safe > > feature list. > > > >> > >> tx_offload_capa in failsafe is a mask to apply on sub-device capabilities. > >> So, if the capability is not supported by any sub-device it will not be > >> reported. > >> As well if there is the capability bit in the mask, it will not be reported > >> regardless > >> sub-devices capabilities. The description for the patch above tries to > >> explain it - > >> it looks like not that successful. > >> > > Given that no ack/review given to the patch, I am updating it as > > rejected. > Is it a new policy? I thought that it was vice versa before. > >>> Hi Andrew, > >>> > >>> Yes policy is other-way around indeed, when there is no comment at all > >>> default > >>> behavior is accept, but please take above paragraph as my comment to the > >>> patch. > >> > >> Got it. > >> > >>> And I was thinking it is a little controversial and there is no support > >>> to have > >>> it, so lets don't get it. What do you think? > >> > >> I see you motivation. > >> > >> Signed-off-by: Ivan Malov > >> Signed-off-by: Andrew Rybchenko > >> --- > >>doc/guides/nics/features/failsafe.ini | 1 + > >>drivers/net/failsafe/failsafe_ops.c | 1 + > >>2 files changed, 2 insertions(+) > >> > >> diff --git a/doc/guides/nics/features/failsafe.ini > >> b/doc/guides/nics/features/failsafe.ini > >> index e3c4c08f2..b6f3dcee6 100644 > >> --- a/doc/guides/nics/features/failsafe.ini > >> +++ b/doc/guides/nics/features/failsafe.ini > >> @@ -7,6 +7,7 @@ > >>Link status = Y > >>Link status event= Y > >>Rx interrupt = Y > >> +Fast mbuf free = Y > >>Queue start/stop = Y > >>Runtime Rx queue setup = Y > >>Runtime Tx queue setup = Y > >> diff --git a/drivers/net/failsafe/failsafe_ops.c > >> b/drivers/net/failsafe/failsafe_ops.c > >> index 7f8bcd4c6..e3add404b 100644 > >> --- a/drivers/net/failsafe/failsafe_ops.c > >> +++ b/drivers/net/failsafe/failsafe_ops.c > >> @@ -78,6 +78,7 @@ static struct rte_eth_dev_info default_infos = { > >>DEV_RX_OFFLOAD_SECURITY, > >>.tx_offload_capa = > >>DEV_TX_OFFLOAD_MULTI_SEGS | > >> + DEV_TX_OFFLOAD_MBUF_FAST_FREE | > >>DEV_TX_OFFLOAD_IPV4_CKSUM | > >>DEV_TX_OFFLOAD_UDP_CKSUM | > >>DEV_TX_OFFLOAD_TCP_CKSUM | > >> > >> > > > -- Gaëtan Rivet 6WIND
Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op might never get dequeued
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 2:13 PM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Nicolau, Radu > Subject: Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op > might never get dequeued > > > > On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > > In some cases crypto-ops could never be dequeued from the crypto-device. > > The easiest way to reproduce: > > start ipsec-secgw with crypto-dev and send to it less then 32 packets. > > none packets will be forwarded. > > Reason for that is that the application does dequeue() from crypto-queues > > only when new packets arrive. > > This patch makes sure it calls dequeue() on a regular basis. > > > > Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop") > Thanks for looking into this age long issue of ipsec-secgw. But wouldn't > this cause packet reordering, > and the packets which are somehow left in the queue will get delayed and > would be dropped subsequently due to anti-replay late? Could you explain a bit more - how do you think reordering might happen? Now we always processing packets belonging to particular SA on the same crypto-dev. Konstantin
Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
On 12/21/2018 7:57 PM, Ananyev, Konstantin wrote: > + */ > + > +/* > + * Move preceding (L3) headers down to remove ESP header and IV. > + */ why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths. >>> We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len. >>> But apart from that for transport mode we have to move actual packet >>> headers. >>> Let say for inbound we have to get rid of ESP header (which is after IP >>> header), >>> but preserve IP header, so we moving L2/L3 headers down, overwriting ESP >>> header. >> ok got your point I believe these adjustments are happening in the mbuf itself. Moreover these APIs are not specific to esp headers. >>> I didn't get your last sentence: that function is used to remove esp header >>> (see above) - that's why I named it that way. >> These can be used to remove any header and not specifically esp. So this >> API could be generic in rte_mbuf. > That function has nothing to do with mbuf in general. > It just copies bytes between overlapping in certain way buffers > (src.start < dst.start < src.end < dst.end). > Right now it is very primitive - copies on byte at a time in > descending order. > Wrote it just to avoid using memmove(). > I don't think there is any point to have such dummy function in the lib/eal. If this is better than memmove, then probably it is a candidate to a function in lib. I think Thomas/ Olivier can better comment on this > > +static inline void > +remove_esph(char *np, char *op, uint32_t hlen) > +{ > + uint32_t i; > + > + for (i = hlen; i-- != 0; np[i] = op[i]) > + ; > +} > + > +/* > + > +/* update original and new ip header fields for tunnel case */ > +static inline void > +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > + uint32_t l2len, rte_be16_t pid) > +{ > + struct ipv4_hdr *v4h; > + struct ipv6_hdr *v6h; > + > + if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) { > + v4h = p; > + v4h->packet_id = pid; > + v4h->total_length = rte_cpu_to_be_16(plen - l2len); where are we updating the rest of the fields, like ttl, checksum, ip addresses, etc >>> TTL, ip addresses and other fileds supposed to be setuped by user >>> and provided via rte_ipsec_sa_init(): >>> struct rte_ipsec_sa_prm.tun.hdr should contain prepared template >>> for L3(and L2 if user wants to) header. >>> Checksum calculation is not done inside the lib right now - >>> it is a user responsibility to caclucate/set it after librte_ipsec >>> finishes processing the packet. >> I believe static fields are updated during sa init but some fields like >> ttl and checksum, >> can be updated in the library itself which is updated for every packet. >> (https://tools.ietf.org/html/rfc1624) > About checksum - there is no point to calculate cksum it in the lib, > as user may choose to use HW chksum offload. > All other libraries ip_frag, GSO, etc. leave it to the user, > I don't see why ipsec should be different here. > About TTL and other fields - I suppose you refer to: > https://tools.ietf.org/html/rfc4301#section-5.1.2 > Header Construction for Tunnel Mode > right? > Surely that has to be supported, one way or the other, > but we don't plan to implement it in 19.02. > Current plan to add it in 19.05, if time permits. I am not talking about the outer ip checksum. Sorry the placement of the comment was not quite right. But I do not see that happening. My question is will the function ipip_outbound in ipsec-secgw called from the application or will it be moved inside the library. I believe this should be inside the lib. > + } else { > + v6h = p; > + v6h->payload_len = rte_cpu_to_be_16(plen - l2len - > + sizeof(*v6h)); > + } > +} > + > +#endif /* _IPH_H_ */ > diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h > index 1935f6e30..6e18c34eb 100644 > --- a/lib/librte_ipsec/ipsec_sqn.h > +++ b/lib/librte_ipsec/ipsec_sqn.h > @@ -15,6 +15,45 @@ > > #define IS_ESN(sa)((sa)->sqn_mask == UINT64_MAX) > > +/* > + * gets SQN.hi32 bits, SQN supposed to be in network byte order. > + */ > +static inline rte_be32_t > +sqn_hi32(rte_be64_t sqn) > +{ > +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > + return (sqn >> 32); > +#else > + return sqn; > +#endif > +} > + > +/* > + * gets SQN.low32 bits, SQN supposed to be in network byte order. > + */ > +static inline rte_be32_t > +sqn_low32(rte_be64_t sqn) > +{ > +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN > + return sqn; > +#else > + return (sqn >> 32); > +#endif > +} > + > +/* > + * gets SQN.low16 bits, SQN supposed to be in network byte order. > + */ > +static inline rte_be16_t > +sqn_low1
Re: [dpdk-dev] [PATCH v4 4/9] examples/ipsec-secgw: fix outbound codepath for single SA
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 2:26 PM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Nicolau, Radu > Subject: Re: [dpdk-dev] [PATCH v4 4/9] examples/ipsec-secgw: fix outbound > codepath for single SA > > > > On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > > Looking at process_pkts_outbound_nosp() there seems few issues: > > - accessing mbuf after it was freed > > - invoking ipsec_outbound() for ipv4 packets only > > - copying number of packets, but not the mbuf pointers itself > > > > that patch provides fixes for that issues. > > > > Fixes: 906257e965b7 ("examples/ipsec-secgw: support IPv6") > > > > Signed-off-by: Konstantin Ananyev > > Acked-by: Radu Nicolau > > --- > > examples/ipsec-secgw/ipsec-secgw.c | 32 -- > > 1 file changed, 22 insertions(+), 10 deletions(-) > > > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > > b/examples/ipsec-secgw/ipsec-secgw.c > > index 62443172a..d1da2d5ce 100644 > > --- a/examples/ipsec-secgw/ipsec-secgw.c > > +++ b/examples/ipsec-secgw/ipsec-secgw.c > > @@ -616,32 +616,44 @@ process_pkts_outbound_nosp(struct ipsec_ctx > > *ipsec_ctx, > > struct ipsec_traffic *traffic) > > { > > struct rte_mbuf *m; > > - uint32_t nb_pkts_out, i; > > + uint32_t nb_pkts_out, i, n; > > struct ip *ip; > > > > /* Drop any IPsec traffic from protected ports */ > > for (i = 0; i < traffic->ipsec.num; i++) > > rte_pktmbuf_free(traffic->ipsec.pkts[i]); > > > > - traffic->ipsec.num = 0; > > + n = 0; > > > > - for (i = 0; i < traffic->ip4.num; i++) > > - traffic->ip4.res[i] = single_sa_idx; > > + for (i = 0; i < traffic->ip4.num; i++) { > > + traffic->ipsec.pkts[n] = traffic->ip4.pkts[i]; > > + traffic->ipsec.res[n++] = single_sa_idx; > > + } > > > > - for (i = 0; i < traffic->ip6.num; i++) > > - traffic->ip6.res[i] = single_sa_idx; > > + for (i = 0; i < traffic->ip6.num; i++) { > > + traffic->ipsec.pkts[n] = traffic->ip6.pkts[i]; > > + traffic->ipsec.res[n++] = single_sa_idx; > > + } > > + > > + traffic->ip4.num = 0; > > + traffic->ip6.num = 0; > > + traffic->ipsec.num = n; > > > > - nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ip4.pkts, > > - traffic->ip4.res, traffic->ip4.num, > > + nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipsec.pkts, > > + traffic->ipsec.res, traffic->ipsec.num, > > MAX_PKT_BURST); > > > > /* They all sue the same SA (ip4 or ip6 tunnel) */ > > m = traffic->ipsec.pkts[i]; > > ip = rte_pktmbuf_mtod(m, struct ip *); > > - if (ip->ip_v == IPVERSION) > > + if (ip->ip_v == IPVERSION) { > > traffic->ip4.num = nb_pkts_out; > > - else > > + for (i = 0; i < nb_pkts_out; i++) > > + traffic->ip4.pkts[i] = traffic->ipsec.pkts[i]; > > + } else { > > traffic->ip6.num = nb_pkts_out; > > + traffic->ip6.pkts[i] = traffic->ipsec.pkts[i]; > you don't need a for loop here?? > > + } Yep, missed that. Will update. > > } > > > > static inline int32_t
Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op might never get dequeued
On 12/21/2018 8:19 PM, Ananyev, Konstantin wrote: > >> -Original Message- >> From: Akhil Goyal [mailto:akhil.go...@nxp.com] >> Sent: Friday, December 21, 2018 2:13 PM >> To: Ananyev, Konstantin ; dev@dpdk.org >> Cc: Nicolau, Radu >> Subject: Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op >> might never get dequeued >> >> >> >> On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: >>> In some cases crypto-ops could never be dequeued from the crypto-device. >>> The easiest way to reproduce: >>> start ipsec-secgw with crypto-dev and send to it less then 32 packets. >>> none packets will be forwarded. >>> Reason for that is that the application does dequeue() from crypto-queues >>> only when new packets arrive. >>> This patch makes sure it calls dequeue() on a regular basis. >>> >>> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop") >> Thanks for looking into this age long issue of ipsec-secgw. But wouldn't >> this cause packet reordering, >> and the packets which are somehow left in the queue will get delayed and >> would be dropped subsequently due to anti-replay late? > Could you explain a bit more - how do you think reordering might happen? I thought any core can pick the remainder of the packets and can give to any of the cryptodevs. If that is assured, then probably we wont face such issues. > Now we always processing packets belonging to particular SA on the same > crypto-dev. > Konstantin
Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op might never get dequeued
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 2:57 PM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Nicolau, Radu > Subject: Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op > might never get dequeued > > > > On 12/21/2018 8:19 PM, Ananyev, Konstantin wrote: > > > >> -Original Message- > >> From: Akhil Goyal [mailto:akhil.go...@nxp.com] > >> Sent: Friday, December 21, 2018 2:13 PM > >> To: Ananyev, Konstantin ; dev@dpdk.org > >> Cc: Nicolau, Radu > >> Subject: Re: [dpdk-dev] [PATCH v4 3/9] examples/ipsec-secgw: fix crypto-op > >> might never get dequeued > >> > >> > >> > >> On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > >>> In some cases crypto-ops could never be dequeued from the crypto-device. > >>> The easiest way to reproduce: > >>> start ipsec-secgw with crypto-dev and send to it less then 32 packets. > >>> none packets will be forwarded. > >>> Reason for that is that the application does dequeue() from crypto-queues > >>> only when new packets arrive. > >>> This patch makes sure it calls dequeue() on a regular basis. > >>> > >>> Fixes: c64278c0c18b ("examples/ipsec-secgw: rework processing loop") > >> Thanks for looking into this age long issue of ipsec-secgw. But wouldn't > >> this cause packet reordering, > >> and the packets which are somehow left in the queue will get delayed and > >> would be dropped subsequently due to anti-replay late? > > Could you explain a bit more - how do you think reordering might happen? > > I thought any core can pick the remainder of the packets and can give to > any of the cryptodevs. No it is not possible with current app design. eqnueue/dequeue from particular crypto-queue is always done by the same core. > If that is assured, then probably we wont face such issues. > > Now we always processing packets belonging to particular SA on the same > > crypto-dev. > > Konstantin
Re: [dpdk-dev] [dpdk-stable] [PATCH v2] app/testpmd: add boundary check in flow commandline
On 12/6/2018 2:38 AM, Wei Zhao wrote: > There is need to add boundary for input number from commandline, > If it beyond the defination, code will return error. > > Signed-off-by: Wei Zhao Reviewed-by: Ferruh Yigit Applied to dpdk-next-net/master, thanks.
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On 12/21/2018 12:12 PM, Ferruh Yigit wrote: > On 10/12/2018 12:36 PM, Andrew Rybchenko wrote: >> From: Ivan Malov >> >> This capability is reported when supported by the current emitting >> sub-device. Failsafe PMD itself does not excercise fast free logic. >> >> Signed-off-by: Ivan Malov >> Signed-off-by: Andrew Rybchenko Acked-by: Gaetan Rivet Applied to dpdk-next-net/master, thanks.
Re: [dpdk-dev] [RFC 00/14] prefix network structures
On 12/21/2018 2:38 PM, Wiles, Keith wrote: > > >> On Dec 20, 2018, at 5:48 PM, Stephen Hemminger >> wrote: >> >> On Thu, 20 Dec 2018 21:59:37 + >> Ferruh Yigit wrote: >> >>> On 10/24/2018 9:18 AM, olivier.matz at 6wind.com (Olivier Matz) wrote: This RFC targets 19.02. The rte_net headers conflict with the libc headers, because some definitions are duplicated, sometimes with few differences. This was discussed in [1], and more recently at the techboard. Before sending the deprecation notice (target for this is 18.11), here is a draft that can be discussed. This RFC adds the rte_ (or RTE_) prefix to all structures, functions and defines in rte_net library. This is a big changeset, that will break the API of many functions, but not the ABI. One question I'm asking is how can we manage the transition. Initially, I hoped it was possible to have a compat layer during one release (supporting both prefixed and unprefixed names), but now that the patch is done, it seems the impact is too big, and impacts too many libraries. Few examples: - rte_eth_macaddr_get/add/remove() use a (struct rte_ether_addr *) - many rte_flow structures use the rte_ prefixed net structures - the mac field of virtio_net structure is rte_ether_addr - the first arg of rte_thash_load_v6_addrs is (struct rte_ipv6_hdr *) ... Therefore, it is clear that doing this would break the compilation of many external applications. Another drawback we need to take in account: it will make the backport of patches more difficult, although this is something that could be tempered by a script. While it is obviously better to have a good namespace convention, we need to identify the issues we have today before deciding it's worth doing the change. Comments? >>> >>> Is there an consensus about the patchset? There was a decision on techboard >>> to >>> go with this change (adding rte_ prefix) [1]. >>> >>> >>> This is something that will affect DPDK consumers. Since many APIs are >>> changing >>> most probably will break API compatibility for many libraries. >>> >>> Meanwhile the conflict with the libc headers mentioned a few times in the >>> past, >>> this is something we need to fix. >>> >>> There are a few comments reluctant to this big modification, but what I >>> understand from Olivier's response both using BSD defines or having >>> compatibility headers in DPDK won't solve the problem completely. >>> >>> And assuming we will continue with this solution, another question is do we >>> still want to push in 19.02 scope? (And from process point of view I think a >>> deprecation notice is not merged for this change in 18.11 scope.) >>> >>> With the prediction of 19.05 will be big and already break API/ABI for some >>> libraries, can we push this into 19.05 as an early merge into repo? >>> >>> And I think this patch will affect LTS releases and will break auto >>> backporting >>> for many fixes because it touches many places, so pushing this change even >>> to >>> next LTS (19.11) can be an option. >>> >>> >>> Olivier, Thomas, >>> >>> What do you think about postponing this to 19.05 or even 19.11 ? >>> >>> >>> >>> [1] >>> https://mails.dpdk.org/archives/dev/2018-October/116695.html >>> Things that are missing in RFC: - test with FreeBSD - manually fix some indentation issues Olivier Matz (14): net: add rte prefix to arp structures net: add rte prefix to arp defines net: add rte prefix to ether structures net: add rte prefix to ether functions net: add rte prefix to ether defines net: add rte prefix to esp structure net: add rte prefix to gre structure net: add rte prefix to icmp structure net: add rte prefix to icmp defines net: add rte prefix to ip structure net: add rte prefix to ip defines net: add rte prefix to sctp structure net: add rte prefix to tcp structure net: add rte prefix to udp structure >>> >>> >> >> Sigh. Another case where DPDK has to reinvent something because >> we can't figure out how to do dependent libraries correctly. >> I would have rather just using the existing Linux, BSD definitions >> and fixing the DPDK code. >> >> It is probably the only viable compromise, but it adds to long >> term maintenance, and breaks DPDK applications. Neither of which >> is a good thing. >> >> Should this be done by marking the old structure deprecated >> first? Ideally, spread over three releases: first, tell the users >> in the documentation it is coming; second, mark the old structures >> as deprecated causing compiler warnings; third, remove the old >> definitions. Doing at once is introducing user pain for no gain. > > +1 With the current timeline, readiness of the patch and comments, at least it won't able to make th
Re: [dpdk-dev] [PATCH v4 6/9] examples/ipsec-secgw: make app to use ipsec library
On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > Changes to make ipsec-secgw to utilize librte_ipsec library. > That patch provides: > - changes in the related data structures. > - changes in the initialization code. > - new command-line parameters to enable librte_ipsec codepath > and related features. > > Note that right now by default current (non-librte_ipsec) code-path will > be used. User has to run application with new command-line option ('-l') > to enable new codepath. > The main reason for that: > - current librte_ipsec doesn't support all ipsec algorithms > and features that the app does. > - allow users to run both versions in parallel for some time > to figure out any functional or performance degradation with the > new code. > > It is planned to deprecate and remove non-librte_ipsec code path > in future releases. > > Signed-off-by: Mohammad Abdul Awal > Signed-off-by: Bernard Iremonger > Signed-off-by: Konstantin Ananyev > Acked-by: Radu Nicolau > --- > examples/ipsec-secgw/ipsec-secgw.c | 50 ++- > examples/ipsec-secgw/ipsec.h | 24 > examples/ipsec-secgw/meson.build | 2 +- > examples/ipsec-secgw/sa.c | 221 - > examples/ipsec-secgw/sp4.c | 25 > examples/ipsec-secgw/sp6.c | 25 > 6 files changed, 341 insertions(+), 6 deletions(-) > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > b/examples/ipsec-secgw/ipsec-secgw.c > index d1da2d5ce..48baa5001 100644 > --- a/examples/ipsec-secgw/ipsec-secgw.c > +++ b/examples/ipsec-secgw/ipsec-secgw.c > @@ -155,6 +155,9 @@ static uint32_t single_sa; > static uint32_t single_sa_idx; > static uint32_t frame_size; > > +/* application wide librte_ipsec/SA parameters */ > +struct app_sa_prm app_sa_prm = {.enable = 0}; > + > struct lcore_rx_queue { > uint16_t port_id; > uint8_t queue_id; > @@ -1063,6 +1066,10 @@ print_usage(const char *prgname) > " [-P]" > " [-u PORTMASK]" > " [-j FRAMESIZE]" > + " [-l]" > + " [-w REPLAY_WINDOW_SIZE]" > + " [-e]" > + " [-a]" > " -f CONFIG_FILE" > " --config (port,queue,lcore)[,(port,queue,lcore)]" > " [--single-sa SAIDX]" > @@ -1073,6 +1080,10 @@ print_usage(const char *prgname) > " -u PORTMASK: Hexadecimal bitmask of unprotected ports\n" > " -j FRAMESIZE: Enable jumbo frame with 'FRAMESIZE' as > maximum\n" > "packet size\n" > + " -l enables code-path that uses librte_ipsec\n" > + " -w REPLAY_WINDOW_SIZE specifies IPsec SQN replay window\n" > + " size for each SA\n" > + " -a enables SA SQN atomic behaviour\n" -e missing > " -f CONFIG_FILE: Configuration file\n" > " --config (port,queue,lcore): Rx queue configuration\n" > " --single-sa SAIDX: Use single SA index for outbound > traffic,\n" > @@ -1169,6 +1180,20 @@ parse_config(const char *q_arg) > return 0; > } > > +static void > +print_app_sa_prm(const struct app_sa_prm *prm) > +{ > + printf("librte_ipsec usage: %s\n", > + (prm->enable == 0) ? "disabled" : "enabled"); > + > + if (prm->enable == 0) > + return; > + > + printf("replay window size: %u\n", prm->window_size); > + printf("ESN: %s\n", (prm->enable_esn == 0) ? "disabled" : "enabled"); > + printf("SA flags: %#" PRIx64 "\n", prm->flags); > +} > + > static int32_t > parse_args(int32_t argc, char **argv) > { > @@ -1180,7 +1205,7 @@ parse_args(int32_t argc, char **argv) > > argvopt = argv; > > - while ((opt = getopt_long(argc, argvopt, "p:Pu:f:j:", > + while ((opt = getopt_long(argc, argvopt, "aelp:Pu:f:j:w:", > lgopts, &option_index)) != EOF) { > > switch (opt) { > @@ -1236,6 +1261,21 @@ parse_args(int32_t argc, char **argv) > } > printf("Enabled jumbo frames size %u\n", frame_size); > break; > + case 'l': > + app_sa_prm.enable = 1; > + break; > + case 'w': > + app_sa_prm.enable = 1; > + app_sa_prm.window_size = parse_decimal(optarg); > + break; > + case 'e': > + app_sa_prm.enable = 1; > + app_sa_prm.enable_esn = 1; > + break; > + case 'a': > + app_sa_prm.enable = 1; > + app_sa_prm.flags |= RTE_IPSEC_SAFLAG_SQN_ATOM; > + break; > case CMD_LINE_OPT_CONFIG_NUM: > ret = parse_config(optarg); > if (ret) { > @@ -1280,6 +1320,8 @@ parse_args(int32_t argc, char **argv) >
Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 2:51 PM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Thomas Monjalon ; Awal, Mohammad Abdul > ; olivier.m...@6wind.com > Subject: Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API > > > > On 12/21/2018 7:57 PM, Ananyev, Konstantin wrote: > > + */ > > + > > +/* > > + * Move preceding (L3) headers down to remove ESP header and IV. > > + */ > why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths. > >>> We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len. > >>> But apart from that for transport mode we have to move actual packet > >>> headers. > >>> Let say for inbound we have to get rid of ESP header (which is after IP > >>> header), > >>> but preserve IP header, so we moving L2/L3 headers down, overwriting ESP > >>> header. > >> ok got your point > I believe these adjustments are happening in the mbuf itself. > Moreover these APIs are not specific to esp headers. > >>> I didn't get your last sentence: that function is used to remove esp > >>> header > >>> (see above) - that's why I named it that way. > >> These can be used to remove any header and not specifically esp. So this > >> API could be generic in rte_mbuf. > > That function has nothing to do with mbuf in general. > > It just copies bytes between overlapping in certain way buffers > > (src.start < dst.start < src.end < dst.end). > > Right now it is very primitive - copies on byte at a time in > > descending order. > > Wrote it just to avoid using memmove(). > > I don't think there is any point to have such dummy function in the lib/eal. > If this is better than memmove, then probably it is a candidate to a > function in lib. If it would be something really smart - I would try to push it into the EAL myself. But it is a dumb for() loop, nothing more. > I think Thomas/ Olivier can better comment on this > > > > +static inline void > > +remove_esph(char *np, char *op, uint32_t hlen) > > +{ > > + uint32_t i; > > + > > + for (i = hlen; i-- != 0; np[i] = op[i]) > > + ; > > +} > > + > > +/* > > + > > +/* update original and new ip header fields for tunnel case */ > > +static inline void > > +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen, > > + uint32_t l2len, rte_be16_t pid) > > +{ > > + struct ipv4_hdr *v4h; > > + struct ipv6_hdr *v6h; > > + > > + if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) { > > + v4h = p; > > + v4h->packet_id = pid; > > + v4h->total_length = rte_cpu_to_be_16(plen - l2len); > where are we updating the rest of the fields, like ttl, checksum, ip > addresses, etc > >>> TTL, ip addresses and other fileds supposed to be setuped by user > >>> and provided via rte_ipsec_sa_init(): > >>> struct rte_ipsec_sa_prm.tun.hdr should contain prepared template > >>> for L3(and L2 if user wants to) header. > >>> Checksum calculation is not done inside the lib right now - > >>> it is a user responsibility to caclucate/set it after librte_ipsec > >>> finishes processing the packet. > >> I believe static fields are updated during sa init but some fields like > >> ttl and checksum, > >> can be updated in the library itself which is updated for every packet. > >> (https://tools.ietf.org/html/rfc1624) > > About checksum - there is no point to calculate cksum it in the lib, > > as user may choose to use HW chksum offload. > > All other libraries ip_frag, GSO, etc. leave it to the user, > > I don't see why ipsec should be different here. > > About TTL and other fields - I suppose you refer to: > > https://tools.ietf.org/html/rfc4301#section-5.1.2 > > Header Construction for Tunnel Mode > > right? > > Surely that has to be supported, one way or the other, > > but we don't plan to implement it in 19.02. > > Current plan to add it in 19.05, if time permits. > I am not talking about the outer ip checksum. > Sorry the placement of the > comment was not quite right. But I do not see that happening. > My question is will the function ipip_outbound in ipsec-secgw called > from the application or will it be moved inside the library. > I believe this should be inside the lib. I think the same - we probably need to support all described in RFC header updates inside the process/prepare lib functions, or at least provide a separate function for the user to perform them. Though as I said above it is definitely not in 19.02 scope. > > > > + } else { > > + v6h = p; > > + v6h->payload_len = rte_cpu_to_be_16(plen - l2len - > > + sizeof(*v6h)); > > + } > > +} > > + > > +#endif /* _IPH_H_ */ > > diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/l
Re: [dpdk-dev] [PATCHv2] pkg/dpdk.spec: add dkms to build kernel module
28/03/2017 22:46, Anders Roxell: > Build igb-uio and rte-kni kernel module using dkms. > > Signed-off-by: Anders Roxell The pkg/dpdk.spec file has been removed from the repository. This patch is now marked as "Not Applicable". If you think it was useful, please shout.
Re: [dpdk-dev] [PATCH] net/sfc: pass HW Tx queue index on creation
On 12/21/2018 12:15 PM, Andrew Rybchenko wrote: > Software indexes are PMD internal and should not be passed outside. > Right now SW and HW indexes of the Tx queue match, so it is just > a cosmetic fix. > > Fixes: dbdc82416b72 ("net/sfc: factor out libefx-based Tx datapath") > Cc: sta...@dpdk.org > > Signed-off-by: Andrew Rybchenko Applied to dpdk-next-net/master, thanks.
Re: [dpdk-dev] [PATCH v2 4/4] test/extmem: extend test to cover non-heap API
21/12/2018 12:29, Anatoly Burakov: > Currently, extmem autotest only covers the external malloc heap > API. Extend it to also cover the non-heap, register/unregister > external memory API. > > Signed-off-by: Anatoly Burakov Series applied, thanks
Re: [dpdk-dev] [PATCH v4 7/9] examples/ipsec-secgw: make data-path to use ipsec library
On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c > index 04eb8bce8..49da966a3 100644 > --- a/examples/ipsec-secgw/sa.c > +++ b/examples/ipsec-secgw/sa.c > @@ -1164,10 +1164,15 @@ int > inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx) > { > struct ipsec_mbuf_metadata *priv; > + struct ipsec_sa *sa; > > priv = get_priv(m); > + sa = priv->sa; > + if (sa != NULL) > + return (sa_ctx->sa[sa_idx].spi == sa->spi); > > - return (sa_ctx->sa[sa_idx].spi == priv->sa->spi); > + RTE_LOG(ERR, IPSEC, "SA not saved in private data\n"); > + return 0; > } I believe this is a fix and not related to this patchset. Should go as a fix and CC to stable.
[dpdk-dev] [RFC] virtio-user: ctrl vq support for packed
This adds support to virtio-user for control virtqueues and reverts commit "5e4e7a752 net/virtio-user: fail if cq used with packed vq". I add a struct virtio_user_queue to have a place for wrap counters and avail/used index (which is not needed for split ring because it has those in shared memory). This is a RFC because it only supports in-order use of descriptors in the ring. I'm looking for ideas how to change this without the .next field in the descriptor as we have it with split virtqueues. Signed-off-by: Jens Freimann --- drivers/net/virtio/virtio_ethdev.c| 11 +- .../net/virtio/virtio_user/virtio_user_dev.c | 101 -- .../net/virtio/virtio_user/virtio_user_dev.h | 14 ++- drivers/net/virtio/virtio_user_ethdev.c | 25 - 4 files changed, 135 insertions(+), 16 deletions(-) diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index 446c338fc..010ab6489 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -153,6 +153,7 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, uint16_t flags; int sum = 0; int k; + int ndescs = 0; /* * Format is enforced in qemu code: @@ -162,7 +163,6 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, */ head = vq->vq_avail_idx; wrap_counter = vq->avail_wrap_counter; - desc[head].flags = VRING_DESC_F_NEXT; desc[head].addr = cvq->virtio_net_hdr_mem; desc[head].len = sizeof(struct virtio_net_ctrl_hdr); vq->vq_free_cnt--; @@ -170,6 +170,7 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, vq->vq_avail_idx -= vq->vq_nentries; vq->avail_wrap_counter ^= 1; } + ndescs++; for (k = 0; k < pkt_num; k++) { desc[vq->vq_avail_idx].addr = cvq->virtio_net_hdr_mem @@ -188,6 +189,7 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, vq->vq_avail_idx -= vq->vq_nentries; vq->avail_wrap_counter ^= 1; } + ndescs++; } @@ -198,6 +200,7 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, flags |= VRING_DESC_F_AVAIL(vq->avail_wrap_counter) | VRING_DESC_F_USED(!vq->avail_wrap_counter); desc[vq->vq_avail_idx].flags = flags; + flags = VRING_DESC_F_NEXT; flags |= VRING_DESC_F_AVAIL(wrap_counter) | VRING_DESC_F_USED(!wrap_counter); @@ -209,6 +212,7 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, vq->vq_avail_idx -= vq->vq_nentries; vq->avail_wrap_counter ^= 1; } + ndescs++; virtqueue_notify(vq); @@ -220,8 +224,9 @@ virtio_pq_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, /* now get used descriptors */ while (desc_is_used(&desc[vq->vq_used_cons_idx], vq)) { - vq->vq_free_cnt++; - if (++vq->vq_used_cons_idx >= vq->vq_nentries) { + vq->vq_free_cnt += ndescs; + vq->vq_used_cons_idx += ndescs; + if (vq->vq_used_cons_idx >= vq->vq_nentries) { vq->vq_used_cons_idx -= vq->vq_nentries; vq->used_wrap_counter ^= 1; } diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 5560bd9a3..b324d4391 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -43,15 +43,26 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) struct vhost_vring_file file; struct vhost_vring_state state; struct vring *vring = &dev->vrings[queue_sel]; + struct vring_packed *pq_vring = &dev->packed_vrings[queue_sel]; struct vhost_vring_addr addr = { .index = queue_sel, - .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, - .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, - .used_user_addr = (uint64_t)(uintptr_t)vring->used, .log_guest_addr = 0, .flags = 0, /* disable log */ }; + if (dev->features & (1ULL << VIRTIO_F_RING_PACKED)) { + addr.desc_user_addr = + (uint64_t)(uintptr_t)pq_vring->desc_packed; + addr.avail_user_addr = + (uint64_t)(uintptr_t)pq_vring->driver_event; + addr.used_user_addr = + (uint64_t)(uintptr_t)pq_vring->device_event; + } else { + addr.desc_user_addr = (uint64_t)(uintptr_t)vring->desc; + addr.avail_user_addr = (uint64_t)(uintptr_t)vring->avai
Re: [dpdk-dev] [PATCH] ethdev: ensure consistent port id assignment
On 9/4/2017 3:53 PM, sergio.gonzalez.monroy at intel.com (Sergio Gonzalez Monroy) wrote: > Hi, > > On 24/08/2016 23:17, amin.tootoonchian at intel.com (Tootoonchian, Amin) > wrote: >> Sergio, could you please review this patch? >> >> Thanks, >> Amin > > The patch status should be updated to 'Not Applicable' since similar > functionality has been implemented (commit d948f596). > Only Primary processes are allowed to call rte_eth_dev_allocate(), while > Secondary processes should call rte_eth_dev_attach_secondary() Agreed with comment, and even after that comment many thing changed in multi process support. Patch status updated as 'Not Applicable'. > > Thanks, > Sergio > >>> -Original Message- >>> From: Thomas Monjalon [mailto:thomas.monjalon at 6wind.com] >>> Sent: Wednesday, July 20, 2016 8:12 AM >>> To: Tootoonchian, Amin >>> Cc: dev at dpdk.org; Kerlin, MarcinX >>> Subject: Re: [dpdk-dev] [PATCH] ethdev: ensure consistent port id assignment >>> >>> Hi, >>> >>> 2016-07-20 15:07, Tootoonchian, Amin: Thomas, your thoughts? >>> I have 2 thoughts: >>> - it is too big for 16.07 >>> - it is related to multi-process mechanism, maintained by Sergio ;) >>> >>> Sorry I won't look at it shortly. > > >
Re: [dpdk-dev] [dpdk-stable] [PATCH] telemetry: fix error when using ports of different types
21/12/2018 14:27, Laatz, Kevin: > On 19/12/2018 11:59, Bruce Richardson wrote: > > Different NIC ports can have different numbers of xstats on them, which > > means that we can't just use the xstats list from the first port registered > > in the telemetry library. Instead, we need to check the type of each port - > > by checking its ops structure pointer - and register each port type once > > with the metrics lib. > > > > CC: sta...@dpdk.org > > Fixes: fdbdb3f9ce46 ("telemetry: add initial connection socket") > > > > Signed-off-by: Bruce Richardson > > Acked-by: Kevin Laatz Applied, thanks
Re: [dpdk-dev] [PATCH] cryptodev: fix PMD memory leak
> -Original Message- > From: Zhang, Roy Fan > Sent: Friday, December 21, 2018 2:11 PM > To: dev@dpdk.org > Cc: akhil.go...@nxp.com; De Lara Guarch, Pablo > ; Trahe, Fiona ; > sta...@dpdk.org > Subject: [PATCH] cryptodev: fix PMD memory leak > > This patch fixes the memory leak during queue pair release. > Originally the operation ring is not freed when releasing queue pair, cause > the next queue_pair configure call fail and memory leak. > > Fixes: eec136f3c54f ("aesni_gcm: add driver for AES-GCM crypto operations") > Fixes: cf7685d68f00 ("crypto/zuc: add driver for ZUC library") > Fixes: d61f70b4c918 ("crypto/libcrypto: add driver for OpenSSL library") > Fixes: 3aafc423cf4d ("snow3g: add driver for SNOW 3G library") > > Cc: sta...@dpdk.org > > Signed-off-by: Fan Zhang > --- > drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c | 5 + > drivers/crypto/openssl/rte_openssl_pmd_ops.c | 5 + > drivers/crypto/snow3g/rte_snow3g_pmd_ops.c | 5 + > drivers/crypto/zuc/rte_zuc_pmd_ops.c | 5 + > 4 files changed, 20 insertions(+) Title should be renamed to "drivers/crypto:". Apart from that, Acked-by: Pablo de Lara
Re: [dpdk-dev] [PATCH v2] eal: don't fail secondary if primary is missing tailqs
On 10/5/2016 6:47 PM, jt at labs.hpe.com (Jean Tourrilhes) wrote: > If the primary and secondary process were build using different build > systems, the list of constructors included by the linker in each > binary might be different. Tailqs are registered via constructors, so > the linker magic will directly impact which tailqs are registered with > the primary and the secondary. > > DPDK currently assumes that the secondary has a subset of the tailqs > registered at the primary. In some build scenario, the secondary might > register a tailq that the primary did not register. In this case, > instead of exiting with a panic, just unregister the offending tailq > and allow the secondary to run. > > Signed-off-by: Jean Tourrilhes A lot changed in multiprocess support in last two years, updating status of this patch as 'Rejected', if the issue is still valid can you please either send a new version or report the issue in bugzilla? Thanks, ferruh
Re: [dpdk-dev] [RFC 2/2] doc: add deprecation marker usage
On 12/20/2018 8:02 AM, Luca Boccassi wrote: > On Wed, 2018-12-19 at 12:52 +, Ferruh Yigit wrote: >> Define '__rte_deprecated' usage process. >> >> Suggests keeping old API with '__rte_deprecated' marker until next >> LTS. >> >> Signed-off-by: Ferruh Yigit >> --- >> Cc: Luca Boccassi >> Cc: Kevin Traynor >> Cc: Yongseok Koh >> Cc: Neil Horman >> --- >> doc/guides/contributing/versioning.rst | 9 + >> 1 file changed, 9 insertions(+) >> >> diff --git a/doc/guides/contributing/versioning.rst >> b/doc/guides/contributing/versioning.rst >> index 19af56cd2..e0edd2e20 100644 >> --- a/doc/guides/contributing/versioning.rst >> +++ b/doc/guides/contributing/versioning.rst >> @@ -128,6 +128,15 @@ added to the Release Notes: >> these changes. Binaries using this library built prior to version >> 2.1 will >> require updating and recompilation. >> >> +New API replacing previous one >> +~~ >> + >> +If a new API proposed functionally replaces an existing one, when >> new API becomes >> +active old one marked with ``__rte_deprecated`` until next LTS. In >> next LTS API >> +removed completely. >> + > > Perhaps a bit of rephrasing, ie: > > "If a new API proposed functionally replaces an existing one, when the > new API becomes active then the old one is marked with > ``__rte_deprecated`` until the next LTS. In the next LTS, the API is > removed completely." Thanks for suggestion, I will send a new version with this update. > > Other than that: > > Acked-by: Luca Boccassi >
[dpdk-dev] [PATCH v2 1/2] doc: clean ABI/API policy guide
The original document written from the point of ABI versioning but later additions make document confusing, convert document into a ABI/API policy documentation and organize the document in subsections: - ABI/API Deprecation - Experimental APIs - Library versioning - ABI versioning Aim to clarify confusion between deprecation versioned ABI and overall ABI/API deprecation, also ABI versioning and Library versioning by organizing the sections. Signed-off-by: Ferruh Yigit --- Cc: Luca Boccassi Cc: Kevin Traynor Cc: Yongseok Koh Cc: Neil Horman --- doc/guides/contributing/versioning.rst | 132 + 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/doc/guides/contributing/versioning.rst b/doc/guides/contributing/versioning.rst index 01b36247e..19af56cd2 100644 --- a/doc/guides/contributing/versioning.rst +++ b/doc/guides/contributing/versioning.rst @@ -1,33 +1,31 @@ .. SPDX-License-Identifier: BSD-3-Clause Copyright 2018 The DPDK contributors -Managing ABI updates - +DPDK ABI/API policy +=== Description --- This document details some methods for handling ABI management in the DPDK. -Note this document is not exhaustive, in that C library versioning is flexible -allowing multiple methods to achieve various goals, but it will provide the user -with some introductory methods General Guidelines -- #. Whenever possible, ABI should be preserved -#. Libraries or APIs marked in ``experimental`` state may change without constraint. +#. ABI/API may be changed with a deprecation process +#. The modification of symbols can generally be managed with versioning +#. Libraries or APIs marked in ``experimental`` state may change without constraint #. New APIs will be marked as ``experimental`` for at least one release to allow any issues found by users of the new API to be fixed quickly #. The addition of symbols is generally not problematic -#. The modification of symbols can generally be managed with versioning #. The removal of symbols generally is an ABI break and requires bumping of the LIBABIVER macro #. Updates to the minimum hardware requirements, which drop support for hardware which was previously supported, should be treated as an ABI change. What is an ABI --- +~~ An ABI (Application Binary Interface) is the set of runtime interfaces exposed by a library. It is similar to an API (Application Programming Interface) but @@ -39,9 +37,13 @@ Therefore, in the case of dynamic linking, it is critical that an ABI is preserved, or (when modified), done in such a way that the application is unable to behave improperly or in an unexpected fashion. -The DPDK ABI policy + +ABI/API Deprecation --- +The DPDK ABI policy +~~~ + ABI versions are set at the time of major release labeling, and the ABI may change multiple times, without warning, between the last release label and the HEAD label of the git tree. @@ -99,8 +101,36 @@ readability purposes should be avoided. follow the relevant deprecation policy procedures as above: 3 acks and announcement at least one release in advance. +Examples of Deprecation Notices +~~~ + +The following are some examples of ABI deprecation notices which would be +added to the Release Notes: + +* The Macro ``#RTE_FOO`` is deprecated and will be removed with version 2.0, + to be replaced with the inline function ``rte_foo()``. + +* The function ``rte_mbuf_grok()`` has been updated to include a new parameter + in version 2.0. Backwards compatibility will be maintained for this function + until the release of version 2.1 + +* The members of ``struct rte_foo`` have been reorganized in release 2.0 for + performance reasons. Existing binary applications will have backwards + compatibility in release 2.0, while newly built binaries will need to + reference the new structure variant ``struct rte_foo2``. Compatibility will + be removed in release 2.2, and all applications will require updating and + rebuilding to the new structure at that time, which will be renamed to the + original ``struct rte_foo``. + +* Significant ABI changes are planned for the ``librte_dostuff`` library. The + upcoming release 2.0 will not contain these changes, but release 2.1 will, + and no backwards compatibility is planned due to the extensive nature of + these changes. Binaries using this library built prior to version 2.1 will + require updating and recompilation. + + Experimental APIs -~ +- APIs marked as ``experimental`` are not considered part of the ABI and may change without warning at any time. Since changes to APIs are most likely @@ -130,35 +160,38 @@ is not required. Though, an API should remain in experimental state for at least one release. Thereafter, normal process of posting patch for review to mailing list
[dpdk-dev] [PATCH v2 2/2] doc: add deprecation marker usage
Define '__rte_deprecated' usage process. Suggests keeping old API with '__rte_deprecated' marker until next LTS. Signed-off-by: Ferruh Yigit Acked-by: Luca Boccassi --- Cc: Luca Boccassi Cc: Kevin Traynor Cc: Yongseok Koh Cc: Neil Horman v2: * Rephrased as commented --- doc/guides/contributing/versioning.rst | 10 ++ 1 file changed, 10 insertions(+) diff --git a/doc/guides/contributing/versioning.rst b/doc/guides/contributing/versioning.rst index 19af56cd2..360238985 100644 --- a/doc/guides/contributing/versioning.rst +++ b/doc/guides/contributing/versioning.rst @@ -128,6 +128,16 @@ added to the Release Notes: these changes. Binaries using this library built prior to version 2.1 will require updating and recompilation. +New API replacing previous one +~~ + +If a new API proposed functionally replaces an existing one, when the +new API becomes active then the old one is marked with +``__rte_deprecated`` until the next LTS. In the next LTS, the API is +removed completely. + +Reminder that new API should follow deprecation process to become active. + Experimental APIs - -- 2.17.2
Re: [dpdk-dev] [PATCH v4 1/9] examples/ipsec-secgw: avoid to request unused TX offloads
> -Original Message- > From: Akhil Goyal [mailto:akhil.go...@nxp.com] > Sent: Friday, December 21, 2018 1:57 PM > To: Ananyev, Konstantin ; dev@dpdk.org > Cc: Nicolau, Radu ; Horton, Remy > > Subject: Re: [dpdk-dev] [PATCH v4 1/9] examples/ipsec-secgw: avoid to request > unused TX offloads > > Hi Konstantin, > > On 12/14/2018 10:10 PM, Konstantin Ananyev wrote: > > ipsec-secgw always enables TX offloads > > (DEV_TX_OFFLOAD_MULTI_SEGS, DEV_TX_OFFLOAD_SECURITY), > > even when they are not requested by the config. > > That causes many PMD to choose full-featured TX function, > > which in many cases is much slower then one without offloads. > > That patch adds checks to enabled extra HW offloads, only when > > they were requested. > > Plus it enables DEV_TX_OFFLOAD_IPV4_CKSUM, > > only when other HW TX ofloads are going to be enabled. > > Otherwise SW version of ip cksum calculation is used. > > That allows to use vector TX function, when inline-ipsec is not > > requested. > > > > Signed-off-by: Remy Horton > > Signed-off-by: Konstantin Ananyev > > Acked-by: Radu Nicolau > > --- > > examples/ipsec-secgw/ipsec-secgw.c | 44 +++ > > examples/ipsec-secgw/ipsec.h | 6 > > examples/ipsec-secgw/sa.c | 56 ++ > > 3 files changed, 91 insertions(+), 15 deletions(-) > > > > diff --git a/examples/ipsec-secgw/ipsec-secgw.c > > b/examples/ipsec-secgw/ipsec-secgw.c > > index 1bc0b5b50..cfc2b05e5 100644 > > --- a/examples/ipsec-secgw/ipsec-secgw.c > > +++ b/examples/ipsec-secgw/ipsec-secgw.c > > @@ -208,8 +208,6 @@ static struct rte_eth_conf port_conf = { > > }, > > .txmode = { > > .mq_mode = ETH_MQ_TX_NONE, > > - .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM | > > -DEV_TX_OFFLOAD_MULTI_SEGS), > I believe this is disabling checksum offload for all cases and then > enabling only for inline crypto and inline proto. Yes. > This is breaking lookaside proto and lookaside none cases. Please > correct me if I am wrong. Why breaking? For cases when HW cksum offload is disabled, IPv4 cksum calculation will be done in SW, see below: prepare_tx_pkt(...) { ... + + /* calculate IPv4 cksum in SW */ + if ((pkt->ol_flags & PKT_TX_IP_CKSUM) == 0) + ip->ip_sum = rte_ipv4_cksum((struct ipv4_hdr *)ip); We tested lookaside-none case quite extensively - all works well, in fact on Intel NICs it became even a bit faster because of that change (though not much). Disabling HW offloads when they are not really required has 2 benefits: 1) allows app to be run on NICs without HW offloads support. 2) allows dev_configure() for TX path to select simple/vector TX functions which for many NICs are significantly faster. Konstantin > So a NACK for this if my understanding is correct. > > }, > > }; > > > > @@ -315,7 +313,8 @@ prepare_traffic(struct rte_mbuf **pkts, struct > > ipsec_traffic *t, > > } > > > > static inline void > > -prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > > +prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port, > > + const struct lcore_conf *qconf) > > { > > struct ip *ip; > > struct ether_hdr *ethhdr; > > @@ -325,14 +324,19 @@ prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > > ethhdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, ETHER_HDR_LEN); > > > > if (ip->ip_v == IPVERSION) { > > - pkt->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4; > > + pkt->ol_flags |= qconf->outbound.ipv4_offloads; > > pkt->l3_len = sizeof(struct ip); > > pkt->l2_len = ETHER_HDR_LEN; > > > > ip->ip_sum = 0; > > + > > + /* calculate IPv4 cksum in SW */ > > + if ((pkt->ol_flags & PKT_TX_IP_CKSUM) == 0) > > + ip->ip_sum = rte_ipv4_cksum((struct ipv4_hdr *)ip); > > + > > ethhdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4); > > } else { > > - pkt->ol_flags |= PKT_TX_IPV6; > > + pkt->ol_flags |= qconf->outbound.ipv6_offloads; > > pkt->l3_len = sizeof(struct ip6_hdr); > > pkt->l2_len = ETHER_HDR_LEN; > > > > @@ -346,18 +350,19 @@ prepare_tx_pkt(struct rte_mbuf *pkt, uint16_t port) > > } > > > > static inline void > > -prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t port) > > +prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t port, > > + const struct lcore_conf *qconf) > > { > > int32_t i; > > const int32_t prefetch_offset = 2; > > > > for (i = 0; i < (nb_pkts - prefetch_offset); i++) { > > rte_mbuf_prefetch_part2(pkts[i + prefetch_offset]); > > - prepare_tx_pkt(pkts[i], port); > > + prepare_tx_pkt(pkts[i], port, qconf); > > } > > /* Process left packets */ > > for (; i < nb_pkts; i++) > > - prepare_tx_pkt(pkts[i], port); > > + prep
[dpdk-dev] [PATCH] kni: fix build on RHEL8 and RHEL7.6-ALT
Signed-off-by: David Zeng --- kernel/linux/kni/compat.h | 9 - kernel/linux/kni/ethtool/igb/kcompat.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h index 5aadebb..3c575c7 100644 --- a/kernel/linux/kni/compat.h +++ b/kernel/linux/kni/compat.h @@ -102,8 +102,15 @@ #undef NET_NAME_UNKNOWN #endif +/* + * RHEL has two different version with different kernel version: + * 3.10 is for AMD, Intel, IBM POWER7 and POWER8; + * 4.14 is for ARM and IBM POWER9 + */ #if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))) + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))) #define ndo_change_mtu ndo_change_mtu_rh74 #endif diff --git a/kernel/linux/kni/ethtool/igb/kcompat.h b/kernel/linux/kni/ethtool/igb/kcompat.h index ae1b530..3c0c8e3 100644 --- a/kernel/linux/kni/ethtool/igb/kcompat.h +++ b/kernel/linux/kni/ethtool/igb/kcompat.h @@ -3930,7 +3930,9 @@ static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, #endif #if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_VERSION(7, 5) <= RHEL_RELEASE_CODE)) + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ +(RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \ +(LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))) #define ndo_change_mtu ndo_change_mtu_rh74 #endif -- 1.8.3.1
[dpdk-dev] [PATCH] kni: fix build on RHEL8 and RHEL7.6-ALT
Thanks, Ferruh!
Re: [dpdk-dev] [PATCH] eal: simplify RTE_PMD_DEBUG_TRACE
On 12/14/2018 9:20 PM, Jeff Shaw wrote: > On Fri, Dec 14, 2018 at 12:50:55PM -0800, Stephen Hemminger wrote: >> Use rte_log directly, eliminating no longer used rte_pmd_dev_trace >> function. This removes variable length array which is problem on >> Windows and other compilers not doing C99. >> >> Also, drop unused RTE_PROC_PRIMARY macros. >> >> Reported-by: Jeff Shaw >> Signed-off-by: Stephen Hemminger >> --- >> lib/librte_eal/common/include/rte_dev.h | 43 ++--- >> 1 file changed, 3 insertions(+), 40 deletions(-) >> >> diff --git a/lib/librte_eal/common/include/rte_dev.h >> b/lib/librte_eal/common/include/rte_dev.h >> index a9724dc9181c..e496da440028 100644 >> --- a/lib/librte_eal/common/include/rte_dev.h >> +++ b/lib/librte_eal/common/include/rte_dev.h >> @@ -43,54 +43,17 @@ typedef void (*rte_dev_event_cb_fn)(const char >> *device_name, >> enum rte_dev_event_type event, >> void *cb_arg); >> >> -__attribute__((format(printf, 2, 0))) >> -static inline void >> -rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) >> -{ >> -va_list ap; >> - >> -va_start(ap, fmt); >> - >> -{ >> -char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; >> - >> -va_end(ap); >> - >> -va_start(ap, fmt); >> -vsnprintf(buffer, sizeof(buffer), fmt, ap); >> -va_end(ap); >> - >> -rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", >> -func_name, buffer); >> -} >> -} >> - > > Will this break applications that try to use this function? Because it is not > a documented function, is there no guarantee it will be present? > >> /* >> * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the >> * RTE_*_RET() macros defined below is compiled in debug mode. >> */ >> #if defined(RTE_LIBRTE_EVENTDEV_DEBUG) >> -#define RTE_PMD_DEBUG_TRACE(...) \ >> -rte_pmd_debug_trace(__func__, __VA_ARGS__) >> +#define RTE_PMD_DEBUG_TRACE(fmt, args...) \ >> +rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s():" fmt, __func__, ## args) > > Actually, MSVC does not support named variable arguments either. I think this > will work instead: > > #define RTE_PMD_DEBUG_TRACE(fmt, ...) \ > rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s():" fmt, __func__, > __VA_ARGS__) > > The previous behavior was "%s: ..." not "%s():". I'm not sure if you meant to > change how the messages are displayed. I don't care either way, but maybe > users of the function would prefer the same format. > +1 to remove "rte_pmd_debug_trace()" option, but I guess a new version is required, to switch to '__VA_ARGS__' and perhaps to keep the format same, %s vs %s(). Who can send a new version? Jeff?
Re: [dpdk-dev] [PATCH v3 1/2] lib/librte_meter: add RFC4115 trTCM meter support
Hi Eelco, > > +/** trTCM parameters per metered traffic flow. The CIR, EIT, CBS and EBS Small typo here: EIT to be replaced by EIR. > +parameters only count bytes of IP packets and do not include link specific > +headers. The CBS and EBS need to be greater than zero if CIR and EIR are > +none-zero respectively.*/ > +struct rte_meter_trtcm_rfc4115_params { > + uint64_t cir; /**< Committed Information Rate (CIR). Measured in > bytes per second. */ > + uint64_t eir; /**< Excess Information Rate (EIR). Measured in bytes > per second. */ > + uint64_t cbs; /**< Committed Burst Size (CBS). Measured in bytes. > */ > + uint64_t ebs; /**< Excess Burst Size (EBS). Measured in bytes. */ > +}; > + > +static inline enum rte_meter_color __rte_experimental > +rte_meter_trtcm_rfc4115_color_blind_check( > + struct rte_meter_trtcm_rfc4115 *m, > + struct rte_meter_trtcm_rfc4115_profile *p, > + uint64_t time, > + uint32_t pkt_len) > +{ > + uint64_t time_diff_tc, time_diff_te, n_periods_tc, n_periods_te, tc, > te; > + > + /* Bucket update */ > + time_diff_tc = time - m->time_tc; > + time_diff_te = time - m->time_te; > + n_periods_tc = time_diff_tc / p->cir_period; > + n_periods_te = time_diff_te / p->eir_period; > + m->time_tc += n_periods_tc * p->cir_period; > + m->time_te += n_periods_te * p->eir_period; > + > + tc = m->tc + n_periods_tc * p->cir_bytes_per_period; > + if (tc > p->cbs) > + tc = p->cbs; > + > + te = m->te + n_periods_te * p->eir_bytes_per_period; > + if (te > p->ebs) > + te = p->ebs; > + > + /* Color logic */ > + if (tc >= pkt_len) { > + m->tc = tc - pkt_len; > + m->te = te; > + return e_RTE_METER_GREEN; > + } else if (te >= pkt_len) { > + m->tc = tc; > + m->te = te - pkt_len; > + return e_RTE_METER_YELLOW; > + } > + > + /* If we end up here the color is RED */ > + m->tc = tc; > + m->te = te; > + return e_RTE_METER_RED; > +} > + Since the branch (tc >= pkt_len) == TRUE always returns, I suggest we remove the following "else", as it is redundant: /* Color logic */ if (tc >= pkt_len) { m->tc = tc - pkt_len; m->te = te; return e_RTE_METER_GREEN; } if (te >= pkt_len) { m->tc = tc; m->te = te - pkt_len; return e_RTE_METER_YELLOW; } /* If we end up here the color is RED */ m->tc = tc; m->te = te; return e_RTE_METER_RED; > +static inline enum rte_meter_color __rte_experimental > +rte_meter_trtcm_rfc4115_color_aware_check( > + struct rte_meter_trtcm_rfc4115 *m, > + struct rte_meter_trtcm_rfc4115_profile *p, > + uint64_t time, > + uint32_t pkt_len, > + enum rte_meter_color pkt_color) > +{ > + uint64_t time_diff_tc, time_diff_te, n_periods_tc, n_periods_te, tc, > te; > + > + /* Bucket update */ > + time_diff_tc = time - m->time_tc; > + time_diff_te = time - m->time_te; > + n_periods_tc = time_diff_tc / p->cir_period; > + n_periods_te = time_diff_te / p->eir_period; > + m->time_tc += n_periods_tc * p->cir_period; > + m->time_te += n_periods_te * p->eir_period; > + > + tc = m->tc + n_periods_tc * p->cir_bytes_per_period; > + if (tc > p->cbs) > + tc = p->cbs; > + > + te = m->te + n_periods_te * p->eir_bytes_per_period; > + if (te > p->ebs) > + te = p->ebs; > + > + /* Color logic */ > + if (pkt_color == e_RTE_METER_GREEN) { > + if (tc >= pkt_len) { > + m->tc = tc - pkt_len; > + m->te = te; > + return e_RTE_METER_GREEN; > + } else if (te >= pkt_len) { > + m->tc = tc; > + m->te = te - pkt_len; > + return e_RTE_METER_YELLOW; > + } > + } else if (pkt_color == e_RTE_METER_YELLOW && te >= pkt_len) { > + m->tc = tc; > + m->te = te - pkt_len; > + return e_RTE_METER_YELLOW; > + } > + > + /* If we end up here the color is RED */ > + m->tc = tc; > + m->te = te; > + return e_RTE_METER_RED; > +} > + > + I suggest we follow the logic from the diagram in the RFC rather than the logic in the text preceding the diagram. Although the two descriptions are equivalent (after a bit of thinking), the diagram seems more optimal to me: /* Color logic */ if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) { m->tc = tc - pkt_len; m->te = te; return e_RTE_METER_GREEN; } if ((pkt_color != e_RTE_METER_RED) && (te >= pkt_len)) { m->tc = tc; m->te = te - pkt_len; return e_RTE_METER_YELLOW; } /* If we end up he
[dpdk-dev] [pull-request] next-qos 19.02 PRE-RC1
malloc: add option --match-allocations (2018-12-20 13:01:08 +0100) are available in the Git repository at: http://dpdk.org/git/next/dpdk-next-qos for you to fetch changes up to c7286e56de95b7a3ef2ce4fe91b4ea6422be8481: app/test-pmd: expand RED queue thresholds to 64 bits (2018-12-20 19:03:47 +) Reshma Pattan (2): meter: unify packet color definition mbuf: implement generic format for sched field Rosen Xu (1): app/test-pmd: expand RED queue thresholds to 64 bits Tonghao Zhang (1): sched: fix memory leak on init app/test-pmd/cmdline_tm.c | 24 ++--- doc/guides/rel_notes/deprecation.rst | 10 --- doc/guides/rel_notes/release_19_02.rst | 4 +- examples/qos_sched/app_thread.c| 7 +- examples/qos_sched/main.c | 1 + lib/Makefile | 1 + lib/librte_ethdev/Makefile | 2 +- lib/librte_ethdev/meson.build | 2 +- lib/librte_ethdev/rte_mtr.h| 16 ++-- lib/librte_ethdev/rte_tm.h | 16 ++-- lib/librte_eventdev/rte_event_eth_tx_adapter.h | 18 ++-- lib/librte_mbuf/Makefile | 2 +- lib/librte_mbuf/meson.build| 2 +- lib/librte_mbuf/rte_mbuf.h | 119 - lib/librte_meter/rte_meter.h | 21 +++-- lib/librte_pipeline/rte_table_action.c | 44 +++-- lib/librte_sched/Makefile | 2 +- lib/librte_sched/meson.build | 1 + lib/librte_sched/rte_sched.c | 92 --- lib/librte_sched/rte_sched.h | 10 ++- lib/meson.build| 4 +- test/test/test_sched.c | 9 +- 22 files changed, 245 insertions(+), 162 deletions(-)
Re: [dpdk-dev] [PATCH] kni: fix build on RHEL8 and RHEL7.6-ALT
On 12/21/2018 4:15 PM, David Zeng wrote: > Signed-off-by: David Zeng Acked-by: Ferruh Yigit
[dpdk-dev] kni: fix build on RHEL8 and RHEL7.6-ALT
Fix the coding style.
[dpdk-dev] [PATCH] kni: fix build on RHEL8 and RHEL7.6-ALT
Signed-off-by: David Zeng --- kernel/linux/kni/compat.h | 9 - kernel/linux/kni/ethtool/igb/kcompat.h | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h index 5aadebb..3c575c7 100644 --- a/kernel/linux/kni/compat.h +++ b/kernel/linux/kni/compat.h @@ -102,8 +102,15 @@ #undef NET_NAME_UNKNOWN #endif +/* + * RHEL has two different version with different kernel version: + * 3.10 is for AMD, Intel, IBM POWER7 and POWER8; + * 4.14 is for ARM and IBM POWER9 + */ #if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))) + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))) #define ndo_change_mtu ndo_change_mtu_rh74 #endif diff --git a/kernel/linux/kni/ethtool/igb/kcompat.h b/kernel/linux/kni/ethtool/igb/kcompat.h index ae1b530..430aaba 100644 --- a/kernel/linux/kni/ethtool/igb/kcompat.h +++ b/kernel/linux/kni/ethtool/igb/kcompat.h @@ -3930,7 +3930,9 @@ static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, #endif #if (defined(RHEL_RELEASE_CODE) && \ - (RHEL_RELEASE_VERSION(7, 5) <= RHEL_RELEASE_CODE)) + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8, 0)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(4, 14, 0))) #define ndo_change_mtu ndo_change_mtu_rh74 #endif -- 1.8.3.1
Re: [dpdk-dev] [PATCH] kni: fix build on RHEL8 and RHEL7.6-ALT
On 12/21/2018 4:27 PM, David Zeng wrote: > Signed-off-by: David Zeng Acked-by: Ferruh Yigit
Re: [dpdk-dev] [PATCH] compress/qat: add compression on dh895x
> -Original Message- > From: Jozwiak, TomaszX > Sent: Friday, December 21, 2018 2:15 AM > To: dev@dpdk.org; Trahe, Fiona ; Jozwiak, TomaszX > ; akhil.go...@nxp.com > Subject: [PATCH] compress/qat: add compression on dh895x > > This patch enables compression on dh895x HW series > and updates supported hardware accelerator devices list. > > Signed-off-by: Tomasz Jozwiak Acked-by: Fiona Trahe
Re: [dpdk-dev] [PATCH] test/crypto: don't attempt unsupported SGL tests on aesni mb PMD
Hi Pablo, Thanks for reviewing > -Original Message- > From: De Lara Guarch, Pablo > Sent: Friday, December 21, 2018 4:52 AM > To: Trahe, Fiona ; dev@dpdk.org > Cc: Zhang, Roy Fan ; akhil.go...@nxp.com > Subject: RE: [PATCH] test/crypto: don't attempt unsupported SGL tests on > aesni mb PMD > > > > > -Original Message- > > From: Trahe, Fiona > > Sent: Friday, December 21, 2018 12:02 AM > > To: dev@dpdk.org > > Cc: De Lara Guarch, Pablo ; Trahe, Fiona > > ; Zhang, Roy Fan ; > > akhil.go...@nxp.com > > Subject: [PATCH] test/crypto: don't attempt unsupported SGL tests on aesni > > mb PMD > > > > Remove AESNI_MB flag from SGL test cases which it doesn't support. > > > > Signed-off-by: Fiona Trahe > > Patch looks good, but I wonder if this is a fix and therefore, if it needs to > be backported. [Fiona] It did previously print a message saying the device doesn't support the test case, so I think more of a style change than a bug. I think this way is clearer but not necessary to backport cause only really of interest to developers and dev of new test cases are usually done on latest. > > Thanks, > Pablo
Re: [dpdk-dev] net/failsafe: add default Tx mbuf fast free capability
On Fri, 21 Dec 2018 14:16:46 +0100 Gaëtan Rivet wrote: > If a VF is forbidden by its PF from adding MAC addresses, the driver > should still advertize support for "Unicast MAC filter" right? Not unless both VF and other path support multiple MAC addresses. Since Hyper-V/Azure is limited to a single MAC address by the vswitch it would not be appropriate there.
[dpdk-dev] [PATCH v2] eal: simplify RTE_PMD_DEBUG_TRACE
From: Stephen Hemminger Use rte_log directly, eliminating no longer used rte_pmd_dev_trace function. This removes variable length array which is problem on Windows and other compilers not doing C99. Also, drop unused RTE_PROC_PRIMARY macros. Signed-off-by: Stephen Hemminger Signed-off-by: Jeff Shaw --- V2: - Changed named variable "args..." to use "..." with ##__VA_LIST__. Pasting is necessary to support case where only the format string, with no arguments, is passed. --- lib/librte_eal/common/include/rte_dev.h | 44 +++-- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index a9724dc91..366beb7c0 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -43,54 +43,18 @@ typedef void (*rte_dev_event_cb_fn)(const char *device_name, enum rte_dev_event_type event, void *cb_arg); -__attribute__((format(printf, 2, 0))) -static inline void -rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - - { - char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; - - va_end(ap); - - va_start(ap, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, ap); - va_end(ap); - - rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", - func_name, buffer); - } -} - /* * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the * RTE_*_RET() macros defined below is compiled in debug mode. */ #if defined(RTE_LIBRTE_EVENTDEV_DEBUG) -#define RTE_PMD_DEBUG_TRACE(...) \ - rte_pmd_debug_trace(__func__, __VA_ARGS__) +#define RTE_PMD_DEBUG_TRACE(fmt, ...) \ + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: " fmt, __func__, \ + ##__VA_ARGS__) #else -#define RTE_PMD_DEBUG_TRACE(...) (void)0 +#define RTE_PMD_DEBUG_TRACE(...) do { } while(0) #endif -/* Macros for checking for restricting functions to primary instance only */ -#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ - RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ - return retval; \ - } \ -} while (0) - -#define RTE_PROC_PRIMARY_OR_RET() do { \ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ - RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ - return; \ - } \ -} while (0) - /* Macros to check for invalid function pointers */ #define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \ if ((func) == NULL) { \ -- 2.14.3