[dpdk-dev] [PATCH v3 0/2] support IOMMU for DMA device
This series supports DMA device to use vfio in async vhost. The first patch extends the capability of current vfio dma mapping API to allow partial unmapping for adjacent memory if the platform does not support partial unmapping. The second patch involves the IOMMU programming for guest memory in async vhost. v3: * Move the async_map_status flag to virtio_net structure to avoid ABI breaking. v2: * Add rte_errno filtering for some devices bound in the kernel driver. * Add a flag to check the status of region mapping. * Fix one typo. Xuan Ding (2): vfio: allow partially unmapping adjacent memory vhost: enable IOMMU for async vhost lib/eal/linux/eal_vfio.c | 338 ++- lib/vhost/vhost.h| 4 + lib/vhost/vhost_user.c | 112 - 3 files changed, 342 insertions(+), 112 deletions(-) -- 2.17.1
[dpdk-dev] [PATCH v3 1/2] vfio: allow partially unmapping adjacent memory
Currently, if we map a memory area A, then map a separate memory area B that by coincidence happens to be adjacent to A, current implementation will merge these two segments into one, and if partial unmapping is not supported, these segments will then be only allowed to be unmapped in one go. In other words, given segments A and B that are adjacent, it is currently not possible to map A, then map B, then unmap A. Fix this by adding a notion of "chunk size", which will allow subdividing segments into equally sized segments whenever we are dealing with an IOMMU that does not support partial unmapping. With this change, we will still be able to merge adjacent segments, but only if they are of the same size. If we keep with our above example, adjacent segments A and B will be stored as separate segments if they are of different sizes. Signed-off-by: Anatoly Burakov Signed-off-by: Xuan Ding --- lib/eal/linux/eal_vfio.c | 338 ++- 1 file changed, 228 insertions(+), 110 deletions(-) diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c index 25add2fa5d..657c89ca58 100644 --- a/lib/eal/linux/eal_vfio.c +++ b/lib/eal/linux/eal_vfio.c @@ -31,9 +31,10 @@ */ #define VFIO_MAX_USER_MEM_MAPS 256 struct user_mem_map { - uint64_t addr; - uint64_t iova; - uint64_t len; + uint64_t addr; /**< start VA */ + uint64_t iova; /**< start IOVA */ + uint64_t len; /**< total length of the mapping */ + uint64_t chunk; /**< this mapping can be split in chunks of this size */ }; struct user_mem_maps { @@ -95,7 +96,8 @@ static const struct vfio_iommu_type iommu_types[] = { static int is_null_map(const struct user_mem_map *map) { - return map->addr == 0 && map->iova == 0 && map->len == 0; + return map->addr == 0 && map->iova == 0 && + map->len == 0 && map->chunk == 0; } /* we may need to merge user mem maps together in case of user mapping/unmapping @@ -129,41 +131,90 @@ user_mem_map_cmp(const void *a, const void *b) if (umm_a->len > umm_b->len) return 1; + if (umm_a->chunk < umm_b->chunk) + return -1; + if (umm_a->chunk > umm_b->chunk) + return 1; + return 0; } -/* adjust user map entry. this may result in shortening of existing map, or in - * splitting existing map in two pieces. +/* + * Take in an address range and list of current mappings, and produce a list of + * mappings that will be kept. */ +static int +process_maps(struct user_mem_map *src, size_t src_len, + struct user_mem_map newmap[2], uint64_t vaddr, uint64_t len) +{ + struct user_mem_map *src_first = &src[0]; + struct user_mem_map *src_last = &src[src_len - 1]; + struct user_mem_map *dst_first = &newmap[0]; + /* we can get at most two new segments */ + struct user_mem_map *dst_last = &newmap[1]; + uint64_t first_off = vaddr - src_first->addr; + uint64_t last_off = (src_last->addr + src_last->len) - (vaddr + len); + int newmap_len = 0; + + if (first_off != 0) { + dst_first->addr = src_first->addr; + dst_first->iova = src_first->iova; + dst_first->len = first_off; + dst_first->chunk = src_first->chunk; + + newmap_len++; + } + if (last_off != 0) { + /* if we had start offset, we have two segments */ + struct user_mem_map *last = + first_off == 0 ? dst_first : dst_last; + last->addr = (src_last->addr + src_last->len) - last_off; + last->iova = (src_last->iova + src_last->len) - last_off; + last->len = last_off; + last->chunk = src_last->chunk; + + newmap_len++; + } + return newmap_len; +} + +/* erase certain maps from the list */ static void -adjust_map(struct user_mem_map *src, struct user_mem_map *end, - uint64_t remove_va_start, uint64_t remove_len) -{ - /* if va start is same as start address, we're simply moving start */ - if (remove_va_start == src->addr) { - src->addr += remove_len; - src->iova += remove_len; - src->len -= remove_len; - } else if (remove_va_start + remove_len == src->addr + src->len) { - /* we're shrinking mapping from the end */ - src->len -= remove_len; - } else { - /* we're blowing a hole in the middle */ - struct user_mem_map tmp; - uint64_t total_len = src->len; +delete_maps(struct user_mem_maps *user_mem_maps, struct user_mem_map *del_maps, + size_t n_del) +{ + int i; + size_t j; + + for (i = 0, j = 0; i < VFIO_MAX_USER_MEM_MAPS && j < n_del; i++) { + struct user_mem_map *left = &user_mem_maps->maps[i]; + struct user_mem_map *right =
[dpdk-dev] [PATCH v3 2/2] vhost: enable IOMMU for async vhost
The use of IOMMU has many advantages, such as isolation and address translation. This patch extends the capbility of DMA engine to use IOMMU if the DMA engine is bound to vfio. When set memory table, the guest memory will be mapped into the default container of DPDK. Signed-off-by: Xuan Ding --- lib/vhost/vhost.h | 4 ++ lib/vhost/vhost_user.c | 112 - 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 89a31e4ca8..bc5695e899 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -370,6 +370,10 @@ struct virtio_net { int16_t broadcast_rarp; uint32_tnr_vring; int async_copy; + + /* Record the dma map status for each region. */ + bool*async_map_status; + int extbuf; int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 29a4c9af60..3990e9b057 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include "iotlb.h" #include "vhost.h" @@ -141,6 +143,63 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } +static int +async_dma_map(struct rte_vhost_mem_region *region, bool *dma_map_success, bool do_map) +{ + uint64_t host_iova; + int ret = 0; + + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); + if (do_map) { + /* Add mapped region into the default container of DPDK. */ + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, +region->host_user_addr, +host_iova, +region->size); + *dma_map_success = ret == 0; + + if (ret) { + /* +* DMA device may bind with kernel driver, in this case, +* we don't need to program IOMMU manually. However, if no +* device is bound with vfio/uio in DPDK, and vfio kernel +* module is loaded, the API will still be called and return +* with ENODEV/ENOSUP. +* +* DPDK VFIO only returns ENODEV/ENOSUP in very similar +* situations(VFIO either unsupported, or supported +* but no devices found). Either way, no mappings could be +* performed. We treat it as normal case in async path. +*/ + if (rte_errno == ENODEV && rte_errno == ENOTSUP) { + return 0; + } else { + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); + return ret; + } + } + + } else { + /* No need to do vfio unmap if the map failed. */ + if (!*dma_map_success) + return 0; + + /* Remove mapped region from the default container of DPDK. */ + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + region->host_user_addr, + host_iova, + region->size); + if (ret) { + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); + return ret; + } + /* Clear the flag once the unmap succeeds. */ + *dma_map_success = 0; + } + + return ret; +} + static void free_mem_region(struct virtio_net *dev) { @@ -153,6 +212,9 @@ free_mem_region(struct virtio_net *dev) for (i = 0; i < dev->mem->nregions; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr) { + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(reg, &dev->async_map_status[i], false); + munmap(reg->mmap_addr, reg->mmap_size); close(reg->fd); } @@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev) } dev->postcopy_listening = 0; + + if (dev->async_map_status) { + rte_free(dev->async_map_status); + dev->async_map_status = NULL; + } } static void @@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index) } dev->mem = mem; + if (dev->async_copy && rte_vfio_is_
[dpdk-dev] [PATCH v4 0/2] support IOMMU for DMA device
This series supports DMA device to use vfio in async vhost. The first patch extends the capability of current vfio dma mapping API to allow partial unmapping for adjacent memory if the platform does not support partial unmapping. The second patch involves the IOMMU programming for guest memory in async vhost. v4: * Fix a format issue. v3: * Move the async_map_status flag to virtio_net structure to avoid ABI breaking. v2: * Add rte_errno filtering for some devices bound in the kernel driver. * Add a flag to check the status of region mapping. * Fix one typo. Xuan Ding (2): vfio: allow partially unmapping adjacent memory vhost: enable IOMMU for async vhost lib/eal/linux/eal_vfio.c | 338 ++- lib/vhost/vhost.h| 4 + lib/vhost/vhost_user.c | 112 - 3 files changed, 342 insertions(+), 112 deletions(-) -- 2.17.1
[dpdk-dev] [PATCH v4 1/2] vfio: allow partially unmapping adjacent memory
Currently, if we map a memory area A, then map a separate memory area B that by coincidence happens to be adjacent to A, current implementation will merge these two segments into one, and if partial unmapping is not supported, these segments will then be only allowed to be unmapped in one go. In other words, given segments A and B that are adjacent, it is currently not possible to map A, then map B, then unmap A. Fix this by adding a notion of "chunk size", which will allow subdividing segments into equally sized segments whenever we are dealing with an IOMMU that does not support partial unmapping. With this change, we will still be able to merge adjacent segments, but only if they are of the same size. If we keep with our above example, adjacent segments A and B will be stored as separate segments if they are of different sizes. Signed-off-by: Anatoly Burakov Signed-off-by: Xuan Ding --- lib/eal/linux/eal_vfio.c | 338 ++- 1 file changed, 228 insertions(+), 110 deletions(-) diff --git a/lib/eal/linux/eal_vfio.c b/lib/eal/linux/eal_vfio.c index 25add2fa5d..657c89ca58 100644 --- a/lib/eal/linux/eal_vfio.c +++ b/lib/eal/linux/eal_vfio.c @@ -31,9 +31,10 @@ */ #define VFIO_MAX_USER_MEM_MAPS 256 struct user_mem_map { - uint64_t addr; - uint64_t iova; - uint64_t len; + uint64_t addr; /**< start VA */ + uint64_t iova; /**< start IOVA */ + uint64_t len; /**< total length of the mapping */ + uint64_t chunk; /**< this mapping can be split in chunks of this size */ }; struct user_mem_maps { @@ -95,7 +96,8 @@ static const struct vfio_iommu_type iommu_types[] = { static int is_null_map(const struct user_mem_map *map) { - return map->addr == 0 && map->iova == 0 && map->len == 0; + return map->addr == 0 && map->iova == 0 && + map->len == 0 && map->chunk == 0; } /* we may need to merge user mem maps together in case of user mapping/unmapping @@ -129,41 +131,90 @@ user_mem_map_cmp(const void *a, const void *b) if (umm_a->len > umm_b->len) return 1; + if (umm_a->chunk < umm_b->chunk) + return -1; + if (umm_a->chunk > umm_b->chunk) + return 1; + return 0; } -/* adjust user map entry. this may result in shortening of existing map, or in - * splitting existing map in two pieces. +/* + * Take in an address range and list of current mappings, and produce a list of + * mappings that will be kept. */ +static int +process_maps(struct user_mem_map *src, size_t src_len, + struct user_mem_map newmap[2], uint64_t vaddr, uint64_t len) +{ + struct user_mem_map *src_first = &src[0]; + struct user_mem_map *src_last = &src[src_len - 1]; + struct user_mem_map *dst_first = &newmap[0]; + /* we can get at most two new segments */ + struct user_mem_map *dst_last = &newmap[1]; + uint64_t first_off = vaddr - src_first->addr; + uint64_t last_off = (src_last->addr + src_last->len) - (vaddr + len); + int newmap_len = 0; + + if (first_off != 0) { + dst_first->addr = src_first->addr; + dst_first->iova = src_first->iova; + dst_first->len = first_off; + dst_first->chunk = src_first->chunk; + + newmap_len++; + } + if (last_off != 0) { + /* if we had start offset, we have two segments */ + struct user_mem_map *last = + first_off == 0 ? dst_first : dst_last; + last->addr = (src_last->addr + src_last->len) - last_off; + last->iova = (src_last->iova + src_last->len) - last_off; + last->len = last_off; + last->chunk = src_last->chunk; + + newmap_len++; + } + return newmap_len; +} + +/* erase certain maps from the list */ static void -adjust_map(struct user_mem_map *src, struct user_mem_map *end, - uint64_t remove_va_start, uint64_t remove_len) -{ - /* if va start is same as start address, we're simply moving start */ - if (remove_va_start == src->addr) { - src->addr += remove_len; - src->iova += remove_len; - src->len -= remove_len; - } else if (remove_va_start + remove_len == src->addr + src->len) { - /* we're shrinking mapping from the end */ - src->len -= remove_len; - } else { - /* we're blowing a hole in the middle */ - struct user_mem_map tmp; - uint64_t total_len = src->len; +delete_maps(struct user_mem_maps *user_mem_maps, struct user_mem_map *del_maps, + size_t n_del) +{ + int i; + size_t j; + + for (i = 0, j = 0; i < VFIO_MAX_USER_MEM_MAPS && j < n_del; i++) { + struct user_mem_map *left = &user_mem_maps->maps[i]; + struct user_mem_map *right =
[dpdk-dev] [PATCH v4 2/2] vhost: enable IOMMU for async vhost
The use of IOMMU has many advantages, such as isolation and address translation. This patch extends the capbility of DMA engine to use IOMMU if the DMA engine is bound to vfio. When set memory table, the guest memory will be mapped into the default container of DPDK. Signed-off-by: Xuan Ding --- lib/vhost/vhost.h | 4 ++ lib/vhost/vhost_user.c | 112 - 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h index 89a31e4ca8..bc5695e899 100644 --- a/lib/vhost/vhost.h +++ b/lib/vhost/vhost.h @@ -370,6 +370,10 @@ struct virtio_net { int16_t broadcast_rarp; uint32_tnr_vring; int async_copy; + + /* Record the dma map status for each region. */ + bool*async_map_status; + int extbuf; int linearbuf; struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index 29a4c9af60..10104be18f 100644 --- a/lib/vhost/vhost_user.c +++ b/lib/vhost/vhost_user.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #include "iotlb.h" #include "vhost.h" @@ -141,6 +143,63 @@ get_blk_size(int fd) return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize; } +static int +async_dma_map(struct rte_vhost_mem_region *region, bool *dma_map_success, bool do_map) +{ + uint64_t host_iova; + int ret = 0; + + host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr); + if (do_map) { + /* Add mapped region into the default container of DPDK. */ + ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD, +region->host_user_addr, +host_iova, +region->size); + *dma_map_success = ret == 0; + + if (ret) { + /* +* DMA device may bind with kernel driver, in this case, +* we don't need to program IOMMU manually. However, if no +* device is bound with vfio/uio in DPDK, and vfio kernel +* module is loaded, the API will still be called and return +* with ENODEV/ENOSUP. +* +* DPDK VFIO only returns ENODEV/ENOSUP in very similar +* situations(VFIO either unsupported, or supported +* but no devices found). Either way, no mappings could be +* performed. We treat it as normal case in async path. +*/ + if (rte_errno == ENODEV && rte_errno == ENOTSUP) + return 0; + + VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n"); + return ret; + + } + + } else { + /* No need to do vfio unmap if the map failed. */ + if (!*dma_map_success) + return 0; + + /* Remove mapped region from the default container of DPDK. */ + ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD, + region->host_user_addr, + host_iova, + region->size); + if (ret) { + VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n"); + return ret; + } + /* Clear the flag once the unmap succeeds. */ + *dma_map_success = 0; + } + + return ret; +} + static void free_mem_region(struct virtio_net *dev) { @@ -153,6 +212,9 @@ free_mem_region(struct virtio_net *dev) for (i = 0; i < dev->mem->nregions; i++) { reg = &dev->mem->regions[i]; if (reg->host_user_addr) { + if (dev->async_copy && rte_vfio_is_enabled("vfio")) + async_dma_map(reg, &dev->async_map_status[i], false); + munmap(reg->mmap_addr, reg->mmap_size); close(reg->fd); } @@ -203,6 +265,11 @@ vhost_backend_cleanup(struct virtio_net *dev) } dev->postcopy_listening = 0; + + if (dev->async_map_status) { + rte_free(dev->async_map_status); + dev->async_map_status = NULL; + } } static void @@ -621,6 +688,17 @@ numa_realloc(struct virtio_net *dev, int index) } dev->mem = mem; + if (dev->async_copy && rte_vfio_is_enabled("vfio")) { + dev->async_map_status = rte_zmalloc_so
Re: [dpdk-dev] [PATCH 0/5] kvargs: promote or remove experimental api
On Fri, 2021-09-24 at 17:57 +0200, Olivier Matz wrote: > This patchset promotes 2 functions rte_kvargs_parse_delim() and > rte_kvargs_get() as stable. > > It also replaces rte_kvargs_strcmp() by a new one > rte_kvargs_get_with_value(), which is easier to use. > > Olivier Matz (5): > kvargs: promote delimited parsing as stable > kvargs: promote get from key as stable > kvargs: new function to get from key and value > kvargs: remove experimental function to compare string > kvargs: fix comments style > > drivers/bus/auxiliary/auxiliary_params.c | 9 ++-- > drivers/bus/vdev/vdev_params.c | 13 +- > lib/kvargs/rte_kvargs.c | 32 +++-- > lib/kvargs/rte_kvargs.h | 59 > lib/kvargs/version.map | 7 +-- > 5 files changed, 57 insertions(+), 63 deletions(-) > The whole series looks good to me, the new function that match key and value directly looks more efficient, thanks! Reviewed-by: Xueming Li
Re: [dpdk-dev] [PATCH v1] windows/netuio: add Intel 82599 10GbE VF device ID
On Fri, Sep 24, 2021 at 3:24 PM Kadam, Pallavi wrote: > > Hi William, > > On 9/22/2021 2:19 PM, William Tu wrote: > > Add Intel 82599 10GbE Ethernet adapter VF device IDs to > netuio inf file in order to enable them on Windows. > > Signed-off-by: William Tu > --- > windows/netuio/netuio.inf | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/windows/netuio/netuio.inf b/windows/netuio/netuio.inf > index d166868..6b45ab0 100644 > --- a/windows/netuio/netuio.inf > +++ b/windows/netuio/netuio.inf > @@ -43,6 +43,7 @@ HKR,,Icon,,-5 > %Intel.F101F.Description%=netuio_Device, PCI\VEN_8086&DEV_101F > %Intel.F104F.Description%=netuio_Device, PCI\VEN_8086&DEV_104F > %Intel.F104E.Description%=netuio_Device, PCI\VEN_8086&DEV_104E > +%Intel.F10ED.Description%=netuio_Device, PCI\VEN_8086&DEV_10ED > %Intel.F1591.Description%=netuio_Device, PCI\VEN_8086&DEV_1591 > %Intel.F1592.Description%=netuio_Device, PCI\VEN_8086&DEV_1592 > %Intel.F1593.Description%=netuio_Device, PCI\VEN_8086&DEV_1593 > @@ -108,6 +109,7 @@ Intel.F15FF.Description = "Intel(R) Ethernet Controller > X710 for 10GBASE-T" > Intel.F101F.Description = "Intel(R) Ethernet Controller V710 for 5GBASE-T" > Intel.F104F.Description = "Intel(R) Ethernet Controller X710 for 10 Gigabit > backplane" > Intel.F104E.Description = "Intel(R) Ethernet Controller X710 for 10 Gigabit > SFP+" > +Intel.F10ED.Description = "Intel(R) Ethernet Controller 82599 10GbE Virtual > Function" > > As per NDIS INF, please use following string: > "Intel(R) 82599 Virtual Function" > This is decided by marketing team and cannot be changed. > Thank you, I will send v2 and use the above string. William
Re: [dpdk-dev] RTE_REGISTER_BUS does not work on Windows
I've resolved the issue by adding -Wl,/WHOLEARCHIVE at libdpdk.pc Thanks William On Fri, Sep 24, 2021 at 7:41 AM William Tu wrote: > > Hi, > > I continued my experiment and observed below > 1. for dpdk-testpmd.exe which works fine, the functions in driver/bus/pci/* > are there in the binary, ex: > PS C:\dpdk\build\app> dumpbin.exe /ALL .\dpdk-testpmd.exe | grep rte_pci_set_b > 0001D1F0 0068DB40 0068DC32 0073BDEC rte_pci_set_bus_master > So I think that's why > RTE_REGISTER_BUS(pci, rte_pci_bus.bus) works in dpdk-testpmd.exe > > 2. for my own application binary which static links DPDK library using > pkgconfig, > PS C:\example\build> dumpbin.exe /ALL .\demo.exe | grep rte_pci_set > > and it only has symbols from lib/eal/*, ex: > PS C:\example\build> dumpbin.exe /ALL .\demo.exe | grep rte_bus_re > 0D14 000152D0 0001559E 00095CC8 rte_bus_register > > Apparently the pci bus constructor function doesn't exist in my binary. > I'm going to try using meson subproject > https://mesonbuild.com/Subprojects.html > > Thanks, > William > > On Thu, Sep 23, 2021 at 10:47 AM William Tu wrote: > > > > Hi, > > > > I'm trying to run OVS-DPDK on Windows (building OVS binaries and > > statically link DPDK windows library), and it doesn't work due to > > rte_bus_scan() find no available devices. (However, when using > > dpdk-testpmd.exe, it finds the device successfully) > > > > I realized that the PCI bus driver calls RTE_REGISTER_BUS to > > register itself, and it's a macro creating the businitfn_pci, and calls > > rte_bus_register. But somehow on my system, it does not invoke > > so actually the 'rte_bus_list' is empty. And I guess this is due to > > "constructor" function not working correctly when I static link DPDK. > > > > When linking DPDK using gcc, OVS passes the linker option '--whole-archive'. > > https://github.com/openvswitch/ovs/blob/master/acinclude.m4#L509 > > > > I tried adding -Wl,-force_load, or -Wl,/WHOLEARCHIVE at libdpdk.pc. > > So far I still couldn't get it to work. Any suggestions or comments are > > welcome! > > > > Thank you > > William
[dpdk-dev] [PATCH] net/mlx5: fix xstats get reinitialization
The mlx5_xstats_get gets the device extended statistics. In this function the driver may reinitialize the structures that are used to read device counters. In case of reinitialization, the number of counters may change, which wouldn't be taken into account by the get API callback and can cause a segmentation fault. In case of insufficient supplied stats table size, ex. zero to query the number of extended stats, reinitialization may never happen and the returned stats number, that is used for subsequent stats getting, will not be sufficient too. This issue is fixed by getting and allocating the counters size after the reinitialization. Fixes: 1a611fdaf6ec ("net/mlx5: support missing counter in extended statistics") Fixes: a4193ae3bc4f ("net/mlx5: support extended statistics") Cc: sta...@dpdk.org Signed-off-by: Vladislav Grishenko --- drivers/net/mlx5/mlx5_stats.c | 35 +-- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index ae2f5668a7..7dd7724b05 100644 --- a/drivers/net/mlx5/mlx5_stats.c +++ b/drivers/net/mlx5/mlx5_stats.c @@ -39,23 +39,37 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, unsigned int n) { struct mlx5_priv *priv = dev->data->dev_private; - unsigned int i; - uint64_t counters[n]; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; - uint16_t mlx5_stats_n = xstats_ctrl->mlx5_stats_n; + uint16_t mlx5_stats_n; + int stats_n; + + stats_n = mlx5_os_get_stats_n(dev); + if (stats_n < 0) + return stats_n; + if (xstats_ctrl->stats_n != stats_n) + mlx5_os_stats_init(dev); + mlx5_stats_n = xstats_ctrl->mlx5_stats_n; if (n >= mlx5_stats_n && stats) { - int stats_n; + uint64_t *counters; + unsigned int i; int ret; - stats_n = mlx5_os_get_stats_n(dev); - if (stats_n < 0) - return stats_n; - if (xstats_ctrl->stats_n != stats_n) - mlx5_os_stats_init(dev); + counters = mlx5_malloc(MLX5_MEM_SYS, sizeof(*counters) * + mlx5_stats_n, 0, + SOCKET_ID_ANY); + if (counters == NULL) { + DRV_LOG(WARNING, "port %u unable to allocate " + "memory for xstats counters", + dev->data->port_id); + rte_errno = ENOMEM; + return -rte_errno; + } ret = mlx5_os_read_dev_counters(dev, counters); - if (ret) + if (ret) { + mlx5_free(counters); return ret; + } for (i = 0; i != mlx5_stats_n; ++i) { stats[i].id = i; if (xstats_ctrl->info[i].dev) { @@ -76,6 +90,7 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, (counters[i] - xstats_ctrl->base[i]); } } + mlx5_free(counters); } mlx5_stats_n = mlx5_txpp_xstats_get(dev, stats, n, mlx5_stats_n); return mlx5_stats_n; -- 2.17.1
[dpdk-dev] [PATCH v5 0/5] Add lookaside IPsec tests
Add lookaside IPsec functional tests. Known vector tests and combined mode framework is added. Known vectors are outbound vectors based on https://datatracker.ietf.org/doc/html/draft-mcgrew-gcm-test-01 The vectors are updated to have sequence number as 1 & L4 checksum computed correctly. And they have following properties, 1. ESP 2. Tunnel mode 3. IPv4 4. IPv4 tunnel Known vector tests for inbound operation would generate test vectors by reversing outbound known vectors. The input_text would become encrypted packet and output_text would be the plain packet. Tests would then validate the operation by comparing against plain packet. Combined mode tests are used to test all IPsec features against all ciphers supported by the PMD. The framework is introduced to avoid testing with any specific algo, thereby making it mandatory to be supported. Also, testing with all supported combinations will help with increasing coverage as well. Four test cases use combined mode, 1. Display algo coverage and basic in + out tests 2. Negative test for ICV corruption 3. IV generation 4. UDP encapsulation IV generation test case compares IV generated for a batch of packets and returns failure if IV is repeated. Upcoming additions, 1. AES-CBC-SHA1-HMAC known vectors & combined mode 2. IPv6 3. Transport 4. Mixed mode (IPv4-in-IPv6 etc, all combinations) Tested with following PMDs 1. crypto_octeontx2 2. crypto_cn10k 3. crypto_cn9k Changes in v5: - Fixed lack of return check with dev_configure_and_start() (comment from Akhil) - Adjusted patch title and description (comment from Akhil) Changes in v4: - Fixed lack of device stop in case capability check fails (comment from Ciara) - Rebased and updated release notes Changes in v3 - Added UDP encapsulation tests Changes in v2 - Dropped outbound known vector tests as lookaside protocol would require IV generated by PMD. The tests would be introduced with spec change to allow user to specify IV. - Added IV generation tests - Minor fixes in combined mode tests to handle multiple packets Anoob Joseph (2): test/crypto: add lookaside IPsec cases test/crypto: add combined mode IPsec cases Tejasree Kondoj (3): test/crypto: add lookaside IPsec ICV corrupt test case test/crypto: add IV gen cases for IPsec test/crypto: add UDP encapsulated IPsec test cases app/test/meson.build | 1 + app/test/test.h| 6 + app/test/test_cryptodev.c | 355 ++ app/test/test_cryptodev_security_ipsec.c | 401 + app/test/test_cryptodev_security_ipsec.h | 119 ++ .../test_cryptodev_security_ipsec_test_vectors.h | 321 + doc/guides/rel_notes/release_21_11.rst | 7 + 7 files changed, 1210 insertions(+) create mode 100644 app/test/test_cryptodev_security_ipsec.c create mode 100644 app/test/test_cryptodev_security_ipsec.h create mode 100644 app/test/test_cryptodev_security_ipsec_test_vectors.h -- 2.7.4
[dpdk-dev] [PATCH v5 1/5] test/crypto: add lookaside IPsec cases
Added test case for lookaside IPsec. Inbound known vector tests are added. Cipher list: AES-GCM 128, 192 & 256 Signed-off-by: Anoob Joseph Signed-off-by: Tejasree Kondoj Acked-by: Akhil Goyal Acked-by: Ciara Power Acked-by: Hemant Agrawal --- app/test/meson.build | 1 + app/test/test.h| 6 + app/test/test_cryptodev.c | 236 +++ app/test/test_cryptodev_security_ipsec.c | 212 ++ app/test/test_cryptodev_security_ipsec.h | 66 + .../test_cryptodev_security_ipsec_test_vectors.h | 321 + doc/guides/rel_notes/release_21_11.rst | 4 + 7 files changed, 846 insertions(+) create mode 100644 app/test/test_cryptodev_security_ipsec.c create mode 100644 app/test/test_cryptodev_security_ipsec.h create mode 100644 app/test/test_cryptodev_security_ipsec_test_vectors.h diff --git a/app/test/meson.build b/app/test/meson.build index a761168..f144d8b 100644 --- a/app/test/meson.build +++ b/app/test/meson.build @@ -38,6 +38,7 @@ test_sources = files( 'test_cryptodev.c', 'test_cryptodev_asym.c', 'test_cryptodev_blockcipher.c', +'test_cryptodev_security_ipsec.c', 'test_cryptodev_security_pdcp.c', 'test_cycles.c', 'test_debug.c', diff --git a/app/test/test.h b/app/test/test.h index c3b2a87..7115edf 100644 --- a/app/test/test.h +++ b/app/test/test.h @@ -124,6 +124,12 @@ struct unit_test_case { #define TEST_CASE_WITH_DATA(setup, teardown, testcase, data) \ { setup, teardown, NULL, testcase, #testcase, 1, data } +#define TEST_CASE_NAMED_ST(name, setup, teardown, testcase) \ + { setup, teardown, NULL, testcase, name, 1, NULL } + +#define TEST_CASE_NAMED_WITH_DATA(name, setup, teardown, testcase, data) \ + { setup, teardown, NULL, testcase, name, 1, data } + #define TEST_CASE_DISABLED(fn) { NULL, NULL, fn, NULL, #fn, 0, NULL } #define TEST_CASE_ST_DISABLED(setup, teardown, testcase) \ diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index d19482c..f7d429b 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -16,6 +16,7 @@ #include #include +#include #include #ifdef RTE_CRYPTO_SCHEDULER @@ -41,6 +42,8 @@ #include "test_cryptodev_hmac_test_vectors.h" #include "test_cryptodev_mixed_test_vectors.h" #ifdef RTE_LIB_SECURITY +#include "test_cryptodev_security_ipsec.h" +#include "test_cryptodev_security_ipsec_test_vectors.h" #include "test_cryptodev_security_pdcp_test_vectors.h" #include "test_cryptodev_security_pdcp_sdap_test_vectors.h" #include "test_cryptodev_security_pdcp_test_func.h" @@ -123,6 +126,13 @@ test_AES_CBC_HMAC_SHA512_decrypt_perform(struct rte_cryptodev_sym_session *sess, const uint8_t *digest, const uint8_t *iv); +static int +security_proto_supported(enum rte_security_session_action_type action, + enum rte_security_session_protocol proto); + +static int +dev_configure_and_start(uint64_t ff_disable); + static struct rte_mbuf * setup_test_string(struct rte_mempool *mpool, const char *string, size_t len, uint8_t blocksize) @@ -753,6 +763,47 @@ crypto_gen_testsuite_setup(void) #ifdef RTE_LIB_SECURITY static int +ipsec_proto_testsuite_setup(void) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + struct rte_cryptodev_info dev_info; + int ret = 0; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_SECURITY)) { + RTE_LOG(INFO, USER1, "Feature flag requirements for IPsec Proto " + "testsuite not met\n"); + return TEST_SKIPPED; + } + + /* Reconfigure to enable security */ + ret = dev_configure_and_start(0); + if (ret != TEST_SUCCESS) + return ret; + + /* Set action type */ + ut_params->type = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL; + + if (security_proto_supported( + RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL, + RTE_SECURITY_PROTOCOL_IPSEC) < 0) { + RTE_LOG(INFO, USER1, "Capability requirements for IPsec Proto " + "test not met\n"); + ret = TEST_SKIPPED; + } + + /* +* Stop the device. Device would be started again by individual test +* case setup routine. +*/ + rte_cryptodev_stop(ts_params->valid_devs[0]); + + return ret; +} + +static int pdcp_proto_testsuite_setup(void) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -8854,6 +8905,170 @@ test_PDCP_SDAP_PROTO_decap_all(void) } static int +test_ipsec_proto_process(const struct ipsec_test_d
[dpdk-dev] [PATCH v5 2/5] test/crypto: add combined mode IPsec cases
Add framework to test IPsec features with all supported combinations of ciphers. Combined mode tests are used to test all IPsec features against all ciphers supported by the PMD. The framework is introduced to avoid testing with any specific algo, thereby making it mandatory to be supported. Also, testing with all supported combinations will help with increasing coverage as well. The tests would first do IPsec encapsulation and do sanity checks. Based on flags, packet would be updated or additional checks are done, followed by IPsec decapsulation. Since the encrypted packet is generated by the test, known vectors are not required. Signed-off-by: Anoob Joseph Signed-off-by: Tejasree Kondoj Acked-by: Akhil Goyal Acked-by: Ciara Power --- app/test/test_cryptodev.c| 73 +++-- app/test/test_cryptodev_security_ipsec.c | 107 +-- app/test/test_cryptodev_security_ipsec.h | 52 ++- 3 files changed, 223 insertions(+), 9 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index f7d429b..3eacc66 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -8908,7 +8908,8 @@ static int test_ipsec_proto_process(const struct ipsec_test_data td[], struct ipsec_test_data res_d[], int nb_td, -bool silent) +bool silent, +const struct ipsec_test_flags *flags) { struct crypto_testsuite_params *ts_params = &testsuite_params; struct crypto_unittest_params *ut_params = &unittest_params; @@ -9025,7 +9026,7 @@ test_ipsec_proto_process(const struct ipsec_test_data td[], /* Process crypto operation */ process_crypto_request(dev_id, ut_params->op); - ret = test_ipsec_status_check(ut_params->op, dir); + ret = test_ipsec_status_check(ut_params->op, flags, dir); if (ret != TEST_SUCCESS) goto crypto_op_free; @@ -9033,7 +9034,7 @@ test_ipsec_proto_process(const struct ipsec_test_data td[], res_d_tmp = &res_d[i]; ret = test_ipsec_post_process(ut_params->ibuf, &td[i], - res_d_tmp, silent); + res_d_tmp, silent, flags); if (ret != TEST_SUCCESS) goto crypto_op_free; @@ -9061,11 +9062,71 @@ test_ipsec_proto_process(const struct ipsec_test_data td[], static int test_ipsec_proto_known_vec_inb(const void *td_outb) { + struct ipsec_test_flags flags; struct ipsec_test_data td_inb; + memset(&flags, 0, sizeof(flags)); + test_ipsec_td_in_from_out(td_outb, &td_inb); - return test_ipsec_proto_process(&td_inb, NULL, 1, false); + return test_ipsec_proto_process(&td_inb, NULL, 1, false, &flags); +} + +static int +test_ipsec_proto_all(const struct ipsec_test_flags *flags) +{ + struct ipsec_test_data td_outb[IPSEC_TEST_PACKETS_MAX]; + struct ipsec_test_data td_inb[IPSEC_TEST_PACKETS_MAX]; + unsigned int i, nb_pkts = 1, pass_cnt = 0; + int ret; + + for (i = 0; i < RTE_DIM(aead_list); i++) { + test_ipsec_td_prepare(&aead_list[i], + NULL, + flags, + td_outb, + nb_pkts); + + ret = test_ipsec_proto_process(td_outb, td_inb, nb_pkts, true, + flags); + if (ret == TEST_SKIPPED) + continue; + + if (ret == TEST_FAILED) + return TEST_FAILED; + + test_ipsec_td_update(td_inb, td_outb, nb_pkts, flags); + + ret = test_ipsec_proto_process(td_inb, NULL, nb_pkts, true, + flags); + if (ret == TEST_SKIPPED) + continue; + + if (ret == TEST_FAILED) + return TEST_FAILED; + + if (flags->display_alg) + test_ipsec_display_alg(&aead_list[i], NULL); + + pass_cnt++; + } + + if (pass_cnt > 0) + return TEST_SUCCESS; + else + return TEST_SKIPPED; +} + +static int +test_ipsec_proto_display_list(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.display_alg = true; + + return test_ipsec_proto_all(&flags); } static int @@ -13976,6 +14037,10 @@ static struct unit_test_suite ipsec_proto_testsuite = { "Inbound known vector (ESP tunnel mode IPv4 AES-GCM 256)", ut_setup_security, ut_teardown, test_ipsec_prot
[dpdk-dev] [PATCH v5 3/5] test/crypto: add lookaside IPsec ICV corrupt test case
From: Tejasree Kondoj Add negative test to validate IPsec inbound processing failure with ICV corruption. The tests would first do IPsec encapsulation and corrupt ICV of the generated IPsec packet. Then the packet is submitted to IPsec outbound processing for decapsulation. Test case would validate that PMD returns an error in such cases. Signed-off-by: Anoob Joseph Signed-off-by: Tejasree Kondoj Acked-by: Akhil Goyal Acked-by: Ciara Power Acked-by: Hemant Agrawal --- app/test/test_cryptodev.c| 16 app/test/test_cryptodev_security_ipsec.c | 30 -- app/test/test_cryptodev_security_ipsec.h | 1 + doc/guides/rel_notes/release_21_11.rst | 1 + 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 3eacc66..bfaca1d 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -9130,6 +9130,18 @@ test_ipsec_proto_display_list(const void *data __rte_unused) } static int +test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.icv_corrupt = true; + + return test_ipsec_proto_all(&flags); +} + +static int test_PDCP_PROTO_all(void) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -14041,6 +14053,10 @@ static struct unit_test_suite ipsec_proto_testsuite = { "Combined test alg list", ut_setup_security, ut_teardown, test_ipsec_proto_display_list), + TEST_CASE_NAMED_ST( + "Negative test: ICV corruption", + ut_setup_security, ut_teardown, + test_ipsec_proto_err_icv_corrupt), TEST_CASES_END() /**< NULL terminate unit test array */ } }; diff --git a/app/test/test_cryptodev_security_ipsec.c b/app/test/test_cryptodev_security_ipsec.c index d08e093..aebbe66 100644 --- a/app/test/test_cryptodev_security_ipsec.c +++ b/app/test/test_cryptodev_security_ipsec.c @@ -175,9 +175,12 @@ test_ipsec_td_update(struct ipsec_test_data td_inb[], memcpy(td_inb[i].output_text.data, td_outb[i].input_text.data, td_outb[i].input_text.len); td_inb[i].output_text.len = td_outb->input_text.len; - } - RTE_SET_USED(flags); + if (flags->icv_corrupt) { + int icv_pos = td_inb[i].input_text.len - 4; + td_inb[i].input_text.data[icv_pos] += 1; + } + } } void @@ -217,6 +220,11 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct ipsec_test_data *td, uint8_t *output_text = rte_pktmbuf_mtod(m, uint8_t *); uint32_t skip, len = rte_pktmbuf_pkt_len(m); + /* For negative tests, no need to do verification */ + if (flags->icv_corrupt && + td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) + return TEST_SUCCESS; + if (len != td->output_text.len) { printf("Output length (%d) not matching with expected (%d)\n", len, td->output_text.len); @@ -241,8 +249,6 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct ipsec_test_data *td, return TEST_FAILED; } - RTE_SET_USED(flags); - return TEST_SUCCESS; } @@ -299,13 +305,17 @@ test_ipsec_status_check(struct rte_crypto_op *op, { int ret = TEST_SUCCESS; - if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { - printf("Security op processing failed\n"); - ret = TEST_FAILED; + if (dir == RTE_SECURITY_IPSEC_SA_DIR_INGRESS && flags->icv_corrupt) { + if (op->status != RTE_CRYPTO_OP_STATUS_ERROR) { + printf("ICV corruption test case failed\n"); + ret = TEST_FAILED; + } + } else { + if (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS) { + printf("Security op processing failed\n"); + ret = TEST_FAILED; + } } - RTE_SET_USED(flags); - RTE_SET_USED(dir); - return ret; } diff --git a/app/test/test_cryptodev_security_ipsec.h b/app/test/test_cryptodev_security_ipsec.h index cbb3ee4..134fc3a 100644 --- a/app/test/test_cryptodev_security_ipsec.h +++ b/app/test/test_cryptodev_security_ipsec.h @@ -49,6 +49,7 @@ struct ipsec_test_data { struct ipsec_test_flags { bool display_alg; + bool icv_corrupt; }; struct crypto_param { diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst index cf0277d..8fc5844 100644 --- a/doc/guides/rel_notes/release_21_11.rst +++ b/doc/guides/rel_notes/release_21_11.rst @@ -90,6 +90,7 @@ New Features * **Added lookaside protocol (IPsec) tests in dpdk-test.
[dpdk-dev] [PATCH v5 4/5] test/crypto: add IV gen cases for IPsec
From: Tejasree Kondoj Added cases to verify IV generated by PMD for lookaside IPsec. The tests compare IV generated for a batch of packets and ensures that IV is not getting repeated in the batch. Signed-off-by: Anoob Joseph Signed-off-by: Tejasree Kondoj Acked-by: Akhil Goyal Acked-by: Ciara Power --- app/test/test_cryptodev.c| 19 app/test/test_cryptodev_security_ipsec.c | 52 app/test/test_cryptodev_security_ipsec.h | 1 + doc/guides/rel_notes/release_21_11.rst | 1 + 4 files changed, 73 insertions(+) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index bfaca1d..956541e 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -9080,6 +9080,9 @@ test_ipsec_proto_all(const struct ipsec_test_flags *flags) unsigned int i, nb_pkts = 1, pass_cnt = 0; int ret; + if (flags->iv_gen) + nb_pkts = IPSEC_TEST_PACKETS_MAX; + for (i = 0; i < RTE_DIM(aead_list); i++) { test_ipsec_td_prepare(&aead_list[i], NULL, @@ -9130,6 +9133,18 @@ test_ipsec_proto_display_list(const void *data __rte_unused) } static int +test_ipsec_proto_iv_gen(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.iv_gen = true; + + return test_ipsec_proto_all(&flags); +} + +static int test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused) { struct ipsec_test_flags flags; @@ -14054,6 +14069,10 @@ static struct unit_test_suite ipsec_proto_testsuite = { ut_setup_security, ut_teardown, test_ipsec_proto_display_list), TEST_CASE_NAMED_ST( + "IV generation", + ut_setup_security, ut_teardown, + test_ipsec_proto_iv_gen), + TEST_CASE_NAMED_ST( "Negative test: ICV corruption", ut_setup_security, ut_teardown, test_ipsec_proto_err_icv_corrupt), diff --git a/app/test/test_cryptodev_security_ipsec.c b/app/test/test_cryptodev_security_ipsec.c index aebbe66..78c7f3a 100644 --- a/app/test/test_cryptodev_security_ipsec.c +++ b/app/test/test_cryptodev_security_ipsec.c @@ -4,12 +4,15 @@ #include #include +#include #include #include #include "test.h" #include "test_cryptodev_security_ipsec.h" +#define IV_LEN_MAX 16 + extern struct ipsec_test_data pkt_aes_256_gcm; int @@ -214,6 +217,46 @@ test_ipsec_tunnel_hdr_len_get(const struct ipsec_test_data *td) } static int +test_ipsec_iv_verify_push(struct rte_mbuf *m, const struct ipsec_test_data *td) +{ + static uint8_t iv_queue[IV_LEN_MAX * IPSEC_TEST_PACKETS_MAX]; + uint8_t *iv_tmp, *output_text = rte_pktmbuf_mtod(m, uint8_t *); + int i, iv_pos, iv_len; + static int index; + + if (td->aead) + iv_len = td->xform.aead.aead.iv.length - td->salt.len; + else + iv_len = td->xform.chain.cipher.cipher.iv.length; + + iv_pos = test_ipsec_tunnel_hdr_len_get(td) + sizeof(struct rte_esp_hdr); + output_text += iv_pos; + + TEST_ASSERT(iv_len <= IV_LEN_MAX, "IV length greater than supported"); + + /* Compare against previous values */ + for (i = 0; i < index; i++) { + iv_tmp = &iv_queue[i * IV_LEN_MAX]; + + if (memcmp(output_text, iv_tmp, iv_len) == 0) { + printf("IV repeated"); + return TEST_FAILED; + } + } + + /* Save IV for future comparisons */ + + iv_tmp = &iv_queue[index * IV_LEN_MAX]; + memcpy(iv_tmp, output_text, iv_len); + index++; + + if (index == IPSEC_TEST_PACKETS_MAX) + index = 0; + + return TEST_SUCCESS; +} + +static int test_ipsec_td_verify(struct rte_mbuf *m, const struct ipsec_test_data *td, bool silent, const struct ipsec_test_flags *flags) { @@ -279,6 +322,15 @@ test_ipsec_post_process(struct rte_mbuf *m, const struct ipsec_test_data *td, struct ipsec_test_data *res_d, bool silent, const struct ipsec_test_flags *flags) { + int ret; + + if (flags->iv_gen && + td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) { + ret = test_ipsec_iv_verify_push(m, td); + if (ret != TEST_SUCCESS) + return ret; + } + /* * In case of known vector tests & all inbound tests, res_d provided * would be NULL and output data need to be validated against expected. diff --git a/app/test/test_cryptodev_security_ipsec.h b/app/test/test_cryptodev_security_ipsec.h index 134fc3a..d2ec63f 100644 --- a/app/test/test_cryptodev_security_ipsec.h +++ b/app/test/test_cryptodev_security_ip
[dpdk-dev] [PATCH v5 5/5] test/crypto: add UDP encapsulated IPsec test cases
From: Tejasree Kondoj Add tests to verify UDP encapsulation with IPsec. The tests have IPsec packets generated from plain packets and verifies that UDP header is added. Subsequently, the packets are decapsulated and then resultant packet is verified by comparing against original packet. Signed-off-by: Anoob Joseph Signed-off-by: Tejasree Kondoj Acked-by: Akhil Goyal Acked-by: Ciara Power --- app/test/test_cryptodev.c| 19 +++ app/test/test_cryptodev_security_ipsec.c | 28 app/test/test_cryptodev_security_ipsec.h | 1 + doc/guides/rel_notes/release_21_11.rst | 1 + 4 files changed, 49 insertions(+) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 956541e..bc2490b 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -8946,6 +8946,9 @@ test_ipsec_proto_process(const struct ipsec_test_data td[], sec_cap_idx.ipsec.mode = ipsec_xform.mode; sec_cap_idx.ipsec.direction = ipsec_xform.direction; + if (flags->udp_encap) + ipsec_xform.options.udp_encap = 1; + sec_cap = rte_security_capability_get(ctx, &sec_cap_idx); if (sec_cap == NULL) return TEST_SKIPPED; @@ -9157,6 +9160,18 @@ test_ipsec_proto_err_icv_corrupt(const void *data __rte_unused) } static int +test_ipsec_proto_udp_encap(const void *data __rte_unused) +{ + struct ipsec_test_flags flags; + + memset(&flags, 0, sizeof(flags)); + + flags.udp_encap = true; + + return test_ipsec_proto_all(&flags); +} + +static int test_PDCP_PROTO_all(void) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -14073,6 +14088,10 @@ static struct unit_test_suite ipsec_proto_testsuite = { ut_setup_security, ut_teardown, test_ipsec_proto_iv_gen), TEST_CASE_NAMED_ST( + "UDP encapsulation", + ut_setup_security, ut_teardown, + test_ipsec_proto_udp_encap), + TEST_CASE_NAMED_ST( "Negative test: ICV corruption", ut_setup_security, ut_teardown, test_ipsec_proto_err_icv_corrupt), diff --git a/app/test/test_cryptodev_security_ipsec.c b/app/test/test_cryptodev_security_ipsec.c index 78c7f3a..5b54996 100644 --- a/app/test/test_cryptodev_security_ipsec.c +++ b/app/test/test_cryptodev_security_ipsec.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "test.h" #include "test_cryptodev_security_ipsec.h" @@ -183,6 +184,9 @@ test_ipsec_td_update(struct ipsec_test_data td_inb[], int icv_pos = td_inb[i].input_text.len - 4; td_inb[i].input_text.data[icv_pos] += 1; } + + if (flags->udp_encap) + td_inb[i].ipsec_xform.options.udp_encap = 1; } } @@ -268,6 +272,30 @@ test_ipsec_td_verify(struct rte_mbuf *m, const struct ipsec_test_data *td, td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_INGRESS) return TEST_SUCCESS; + if (td->ipsec_xform.direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS && + flags->udp_encap) { + const struct rte_ipv4_hdr *iph4; + const struct rte_ipv6_hdr *iph6; + + if (td->ipsec_xform.tunnel.type == + RTE_SECURITY_IPSEC_TUNNEL_IPV4) { + iph4 = (const struct rte_ipv4_hdr *)output_text; + if (iph4->next_proto_id != IPPROTO_UDP) { + printf("UDP header is not found\n"); + return TEST_FAILED; + } + } else { + iph6 = (const struct rte_ipv6_hdr *)output_text; + if (iph6->proto != IPPROTO_UDP) { + printf("UDP header is not found\n"); + return TEST_FAILED; + } + } + + len -= sizeof(struct rte_udp_hdr); + output_text += sizeof(struct rte_udp_hdr); + } + if (len != td->output_text.len) { printf("Output length (%d) not matching with expected (%d)\n", len, td->output_text.len); diff --git a/app/test/test_cryptodev_security_ipsec.h b/app/test/test_cryptodev_security_ipsec.h index d2ec63f..e1645f4 100644 --- a/app/test/test_cryptodev_security_ipsec.h +++ b/app/test/test_cryptodev_security_ipsec.h @@ -51,6 +51,7 @@ struct ipsec_test_flags { bool display_alg; bool icv_corrupt; bool iv_gen; + bool udp_encap; }; struct crypto_param { diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst index cda0a92..30c9ccf 100644 --- a/doc/guides/rel_notes/release_21_11.rst +++ b/doc/gui
Re: [dpdk-dev] [PATCH v1] build/pkg-config: Fix warning for Windows
2021-09-24 18:08 (UTC+), William Tu: > Windows does not support linker option '/-no-whole-archive' > or '/whole-archive'. The patch removes them under Windows build. > > Signed-off-by: William Tu > --- > buildtools/pkg-config/meson.build | 10 +++--- > 1 file changed, 7 insertions(+), 3 deletions(-) Hi William, We need to tell the linker to use whole archive somehow; you write [1] that /WHOLEARCHIVE was required in the end. There's no "Windows linker", there are clang and GCC linkers, the latter supports --whole-archive, but the former needs it translated. Argument translation between compilers is handled by meson. I've filed a bug: https://github.com/mesonbuild/meson/issues/9296 [1]: http://inbox.dpdk.org/dev/CALDO+SZ-FLPYNQbUJ-Ctwtph+M4_X_=zyaokgkp5_un4qfq...@mail.gmail.com/
[dpdk-dev] [PATCH] net/bnxt: remove code to initialize SRAM slice node
From: Kishore Padmanabha Remove the code to initialize SRAM slice mode explicitly. The code is redundanat as the memory allocated is initialized to zero by the rte_zmalloc. This fixes the compilation issue for uninitialized pointer when debug optimized configuration is used. Signed-off-by: Kishore Padmanabha Reviewed-by: Farah Smith Reviewed-by: Ajit Khaparde --- drivers/net/bnxt/tf_core/tf_sram_mgr.c | 47 -- 1 file changed, 47 deletions(-) diff --git a/drivers/net/bnxt/tf_core/tf_sram_mgr.c b/drivers/net/bnxt/tf_core/tf_sram_mgr.c index f633a78b25..a248ef2ce8 100644 --- a/drivers/net/bnxt/tf_core/tf_sram_mgr.c +++ b/drivers/net/bnxt/tf_core/tf_sram_mgr.c @@ -407,19 +407,6 @@ tf_sram_is_slice_allocated_in_block(struct tf_sram_block *block, return rc; } -/** - * Initialize slice list - */ -static void -tf_sram_init_slice_list(struct tf_sram_slice_list *slice_list, - enum tf_sram_slice_size slice_size) -{ - slice_list->head = NULL; - slice_list->tail = NULL; - slice_list->cnt = 0; - slice_list->size = slice_size; -} - /** * Get the block count */ @@ -568,11 +555,7 @@ int tf_sram_mgr_bind(void **sram_handle) { int rc = 0; - enum tf_sram_bank_id bank_id; - enum tf_sram_slice_size slice_size; struct tf_sram *sram; - struct tf_sram_slice_list *slice_list; - enum tf_dir dir; struct tfp_calloc_parms cparms; TF_CHECK_PARMS1(sram_handle); @@ -589,37 +572,7 @@ tf_sram_mgr_bind(void **sram_handle) return rc; } sram = (struct tf_sram *)cparms.mem_va; - - /* For each direction -*/ - for (dir = 0; dir < TF_DIR_MAX; dir++) { - /* For each bank -*/ - for (bank_id = TF_SRAM_BANK_ID_0; -bank_id < TF_SRAM_BANK_ID_MAX; -bank_id++) { - /* Create each sized slice empty list -*/ - for (slice_size = TF_SRAM_SLICE_SIZE_8B; -slice_size < TF_SRAM_SLICE_SIZE_MAX; -slice_size++) { - rc = tf_sram_get_slice_list(sram, &slice_list, - slice_size, dir, - bank_id); - if (rc) { - /* Log error */ - TFP_DRV_LOG(ERR, - "No SRAM slice list, rc:%s\n", - strerror(-rc)); - return rc; - } - tf_sram_init_slice_list(slice_list, slice_size); - } - } - } - *sram_handle = sram; - return rc; } -- 2.30.1 (Apple Git-130)
[dpdk-dev] [PATCH] net/virtio: fix vectorized path receive oversized packets
If packed ring size is not power of two, it is possible that remained number less than one batch and meanwhile batch operation can pass. This will cause incorrect remained number calculation and then lead to receiving oversized packets. The patch fixed the issue by added remained number check before batch operation. Fixes: 77d66da83834 ("net/virtio: add vectorized packed ring Rx") Cc: sta...@dpdk.org Signed-off-by: Marvin Liu diff --git a/drivers/net/virtio/virtio_rxtx_packed.c b/drivers/net/virtio/virtio_rxtx_packed.c index ab489a58af..45cf39df22 100644 --- a/drivers/net/virtio/virtio_rxtx_packed.c +++ b/drivers/net/virtio/virtio_rxtx_packed.c @@ -95,11 +95,13 @@ virtio_recv_pkts_packed_vec(void *rx_queue, num = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE); while (num) { - if (!virtqueue_dequeue_batch_packed_vec(rxvq, - &rx_pkts[nb_rx])) { - nb_rx += PACKED_BATCH_SIZE; - num -= PACKED_BATCH_SIZE; - continue; + if (num >= PACKED_BATCH_SIZE) { + if (!virtqueue_dequeue_batch_packed_vec(rxvq, + &rx_pkts[nb_rx])) { + nb_rx += PACKED_BATCH_SIZE; + num -= PACKED_BATCH_SIZE; + continue; + } } if (!virtqueue_dequeue_single_packed_vec(rxvq, &rx_pkts[nb_rx])) { -- 2.17.1
[dpdk-dev] 回复: [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
> -邮件原件- > 发件人: Ananyev, Konstantin > 发送时间: Saturday, September 25, 2021 2:08 AM > 收件人: Feifei Wang ; Yigit, Ferruh > > 抄送: dev@dpdk.org; nd ; sta...@dpdk.org; Ruifeng Wang > > 主题: RE: [RFC PATCH v2 4/5] lib/bpf: use wait event scheme for Rx/Tx > iteration > > > > > > First, fix the bug that keyword const of func arg should be after "*". > > I believe there is no bug here. > > > This is because const before "*" means the value of "cbi" should not > > be changed. > > Exactly, it says that the function itself will not change the value of "cbi". > It just waits for the value to be changed by someone else. > So please keep parameter list intact. Thanks for your explanation. The reason I changed is that I ever used rte_wait_until_xx(validate *addr) API here, And there is conflict between "const" and "validate", complier will report warning here. But now I think since I keep it as it is, there will be no warning due to new macro has no "validate". I will delete this unnecessary bug fix. > > > But we should monitor that cbi->use changed and then we can jump out > > of loop. > > > > Second, instead of polling for cbi->use to be updated, use wait event > > scheme. > > > > Fixes: a93ff62a8938 ("bpf: introduce basic Rx/Tx filters") > > Cc: konstantin.anan...@intel.com > > Cc: sta...@dpdk.org > > > > Signed-off-by: Feifei Wang > > Reviewed-by: Ruifeng Wang > > --- > > lib/bpf/bpf_pkt.c | 11 --- > > 1 file changed, 4 insertions(+), 7 deletions(-) > > > > diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index > > 6e8248f0d6..08ed8ff68c 100644 > > --- a/lib/bpf/bpf_pkt.c > > +++ b/lib/bpf/bpf_pkt.c > > @@ -111,9 +111,9 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi) > > * Waits till datapath finished using given callback. > > */ > > static void > > -bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) > > +bpf_eth_cbi_wait(struct bpf_eth_cbi *const cbi) > > { > > - uint32_t nuse, puse; > > + uint32_t puse; > > > > /* make sure all previous loads and stores are completed */ > > rte_smp_mb(); > > @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) > > > > /* in use, busy wait till current RX/TX iteration is finished */ > > if ((puse & BPF_ETH_CBI_INUSE) != 0) { > > - do { > > - rte_pause(); > > - rte_compiler_barrier(); > > - nuse = cbi->use; > > - } while (nuse == puse); > > + rte_compiler_barrier(); > > + rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, > __ATOMIC_RELAXED); > > } > > } > > > > -- > > 2.25.1
Re: [dpdk-dev] [PATCH v1] build/pkg-config: Fix warning for Windows
On Sat, Sep 25, 2021 at 9:22 AM Dmitry Kozlyuk wrote: > > 2021-09-24 18:08 (UTC+), William Tu: > > Windows does not support linker option '/-no-whole-archive' > > or '/whole-archive'. The patch removes them under Windows build. > > > > Signed-off-by: William Tu > > --- > > buildtools/pkg-config/meson.build | 10 +++--- > > 1 file changed, 7 insertions(+), 3 deletions(-) > > Hi William, > > We need to tell the linker to use whole archive somehow; > you write [1] that /WHOLEARCHIVE was required in the end. > There's no "Windows linker", there are clang and GCC linkers, > the latter supports --whole-archive, but the former needs it translated. > Argument translation between compilers is handled by meson. > I've filed a bug: https://github.com/mesonbuild/meson/issues/9296 > > [1]: > http://inbox.dpdk.org/dev/CALDO+SZ-FLPYNQbUJ-Ctwtph+M4_X_=zyaokgkp5_un4qfq...@mail.gmail.com/ Hi Dmitry, Thank you! One more thing, when I added /WHOLEARCHIVE, I hit some errors due to macro definitions. ex: ws2_32.lib(WS2_32.dll) : error LNK2005: WS2_32_NULL_THUNK_DATA already defined in mincore.lib(WS2_32.dll) Shlwapi.lib(SHLWAPI.dll) : error LNK2005: __NULL_IMPORT_DESCRIPTOR already defined in mincore.lib(api-ms-win-core -com-l1-1-0.dll) Iphlpapi.lib(IPHLPAPI.DLL) : error LNK2005: __IMPORT_DESCRIPTOR_IPHLPAPI already defined in mincore.lib(IPHLPAPI. see: https://github.com/smadaminov/ovs-dpdk-meson-issues/issues/65 In the end, I have to add also: -Wl,/FORCE:MULTIPLE Thanks William
Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue
On Wed, 2021-08-11 at 13:04 +0100, Ferruh Yigit wrote: > On 8/11/2021 9:28 AM, Xueming(Steven) Li wrote: > > > > > > > -Original Message- > > > From: Jerin Jacob > > > Sent: Wednesday, August 11, 2021 4:03 PM > > > To: Xueming(Steven) Li > > > Cc: dpdk-dev ; Ferruh Yigit ; > > > NBU-Contact-Thomas Monjalon ; > > > Andrew Rybchenko > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue > > > > > > On Mon, Aug 9, 2021 at 7:46 PM Xueming(Steven) Li > > > wrote: > > > > > > > > Hi, > > > > > > > > > -Original Message- > > > > > From: Jerin Jacob > > > > > Sent: Monday, August 9, 2021 9:51 PM > > > > > To: Xueming(Steven) Li > > > > > Cc: dpdk-dev ; Ferruh Yigit ; > > > > > NBU-Contact-Thomas Monjalon ; Andrew Rybchenko > > > > > > > > > > Subject: Re: [dpdk-dev] [PATCH v1] ethdev: introduce shared Rx queue > > > > > > > > > > On Mon, Aug 9, 2021 at 5:18 PM Xueming Li wrote: > > > > > > > > > > > > In current DPDK framework, each RX queue is pre-loaded with mbufs > > > > > > for incoming packets. When number of representors scale out in a > > > > > > switch domain, the memory consumption became significant. Most > > > > > > important, polling all ports leads to high cache miss, high > > > > > > latency and low throughput. > > > > > > > > > > > > This patch introduces shared RX queue. Ports with same > > > > > > configuration in a switch domain could share RX queue set by > > > > > > specifying sharing group. > > > > > > Polling any queue using same shared RX queue receives packets from > > > > > > all member ports. Source port is identified by mbuf->port. > > > > > > > > > > > > Port queue number in a shared group should be identical. Queue > > > > > > index is > > > > > > 1:1 mapped in shared group. > > > > > > > > > > > > Share RX queue is supposed to be polled on same thread. > > > > > > > > > > > > Multiple groups is supported by group ID. > > > > > > > > > > Is this offload specific to the representor? If so can this name be > > > > > changed specifically to representor? > > > > > > > > Yes, PF and representor in switch domain could take advantage. > > > > > > > > > If it is for a generic case, how the flow ordering will be maintained? > > > > > > > > Not quite sure that I understood your question. The control path of is > > > > almost same as before, PF and representor port still needed, rte flows > > > > not impacted. > > > > Queues still needed for each member port, descriptors(mbuf) will be > > > > supplied from shared Rx queue in my PMD implementation. > > > > > > My question was if create a generic RTE_ETH_RX_OFFLOAD_SHARED_RXQ > > > offload, multiple ethdev receive queues land into the same > > > receive queue, In that case, how the flow order is maintained for > > > respective receive queues. > > > > I guess the question is testpmd forward stream? The forwarding logic has to > > be changed slightly in case of shared rxq. > > basically for each packet in rx_burst result, lookup source stream > > according to mbuf->port, forwarding to target fs. > > Packets from same source port could be grouped as a small burst to process, > > this will accelerates the performance if traffic come from > > limited ports. I'll introduce some common api to do shard rxq forwarding, > > call it with packets handling callback, so it suites for > > all forwarding engine. Will sent patches soon. > > > > All ports will put the packets in to the same queue (share queue), right? Does > this means only single core will poll only, what will happen if there are > multiple cores polling, won't it cause problem? > > And if this requires specific changes in the application, I am not sure about > the solution, can't this work in a transparent way to the application? Discussed with Jerin, new API introduced in v3 2/8 that aggregate ports in same group into one new port. Users could schedule polling on the aggregated port instead of all member ports. > > Overall, is this for optimizing memory for the port represontors? If so can't > we > have a port representor specific solution, reducing scope can reduce the > complexity it brings? > > > > If this offload is only useful for representor case, Can we make this > > > offload specific to representor the case by changing its name and > > > scope. > > > > It works for both PF and representors in same switch domain, for > > application like OVS, few changes to apply. > > > > > > > > > > > > > > > > > > > > > > > > > > > > > Signed-off-by: Xueming Li > > > > > > --- > > > > > > doc/guides/nics/features.rst| 11 +++ > > > > > > doc/guides/nics/features/default.ini| 1 + > > > > > > doc/guides/prog_guide/switch_representation.rst | 10 ++ > > > > > > lib/ethdev/rte_ethdev.c | 1 + > > > > > > lib/ethdev/rte_ethdev.h | 7 +++ > > > > > > 5 files changed, 30 insertions(+) > > > > > > > > > > > > diff --gi
Re: [dpdk-dev] [PATCH v7 0/3] net/i40e: remove i40evf
> -Original Message- > From: Zhang, RobinX > Sent: Friday, September 24, 2021 2:22 PM > To: dev@dpdk.org > Cc: Wang, Haiyue ; Xing, Beilei > ; m...@ashroe.eu; Wu, Jingjing > ; Burakov, Anatoly ; > Zhang, Qi Z ; Guo, Junfeng ; > Yang, SteveX ; Zhang, RobinX > > Subject: [PATCH v7 0/3] net/i40e: remove i40evf > > In DPDK 21.05, iavf already became the default VF for i40e devices. > So remove i40evf due to it's no need to maintain now. > > v7: > - rebase code. > > v6: > - remove i40e_vf struct and related code, remove doc i40e_vf.ini. > > v5: > - rebase code. > > v4: > - resolve compile warning issue. > > v3: > - remove VF related code in i40e_rxtx.c. > > v2: > - update 21.11 release note, remove some missed documentation. > > Robin Zhang (3): > net/i40e: remove i40evf > net/iavf: remove i40evf devargs option > doc: remove i40evf related documentation > > doc/guides/howto/lm_bond_virtio_sriov.rst |4 +- > doc/guides/nics/features/i40e_vf.ini | 40 - > doc/guides/nics/intel_vf.rst |8 +- > doc/guides/rel_notes/deprecation.rst |8 - > doc/guides/rel_notes/release_21_11.rst|2 + > drivers/net/i40e/base/i40e_osdep.h|1 - > drivers/net/i40e/i40e_ethdev.h| 78 +- > drivers/net/i40e/i40e_ethdev_vf.c | 3006 - > drivers/net/i40e/i40e_rxtx.c | 50 +- > drivers/net/i40e/meson.build |1 - > drivers/net/i40e/rte_pmd_i40e.c |9 +- > drivers/net/iavf/iavf_ethdev.c| 52 +- > 12 files changed, 31 insertions(+), 3228 deletions(-) delete mode 100644 > doc/guides/nics/features/i40e_vf.ini > delete mode 100644 drivers/net/i40e/i40e_ethdev_vf.c > > -- > 2.25.1 Acked-by: Qi Zhang Applied to dpdk-next-inet-intel. Thanks Qi
[dpdk-dev] [RFC PATCH v3 0/5] add new definitions for wait scheme
Add new definitions for wait scheme, and apply this new definitions into lib to replace rte_pause. v2: 1. use macro to create new wait scheme (Stephen) v3: 1. delete unnecessary bug fix in bpf (Konstantin) Feifei Wang (5): eal: add new definitions for wait scheme eal: use wait event for read pflock eal: use wait event scheme for mcslock lib/bpf: use wait event scheme for Rx/Tx iteration lib/distributor: use wait event scheme lib/bpf/bpf_pkt.c| 9 +- lib/distributor/rte_distributor_single.c | 10 +- lib/eal/arm/include/rte_pause_64.h | 151 +++ lib/eal/include/generic/rte_mcslock.h| 12 +- lib/eal/include/generic/rte_pause.h | 78 lib/eal/include/generic/rte_pflock.h | 4 +- 6 files changed, 191 insertions(+), 73 deletions(-) -- 2.25.1
[dpdk-dev] [RFC PATCH v3 1/5] eal: add new definitions for wait scheme
Introduce macros as generic interface for address monitoring. Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- lib/eal/arm/include/rte_pause_64.h | 151 ++-- lib/eal/include/generic/rte_pause.h | 78 ++ 2 files changed, 175 insertions(+), 54 deletions(-) diff --git a/lib/eal/arm/include/rte_pause_64.h b/lib/eal/arm/include/rte_pause_64.h index e87d10b8cc..205510e044 100644 --- a/lib/eal/arm/include/rte_pause_64.h +++ b/lib/eal/arm/include/rte_pause_64.h @@ -31,20 +31,12 @@ static inline void rte_pause(void) /* Put processor into low power WFE(Wait For Event) state. */ #define __WFE() { asm volatile("wfe" : : : "memory"); } -static __rte_always_inline void -rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, - int memorder) -{ - uint16_t value; - - assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); - - /* -* Atomic exclusive load from addr, it returns the 16-bit content of -* *addr while making it 'monitored',when it is written by someone -* else, the 'monitored' state is cleared and a event is generated -* implicitly to exit WFE. -*/ +/* + * Atomic exclusive load from addr, it returns the 16-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ #define __LOAD_EXC_16(src, dst, memorder) { \ if (memorder == __ATOMIC_RELAXED) { \ asm volatile("ldxrh %w[tmp], [%x[addr]]" \ @@ -58,6 +50,52 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, : "memory"); \ } } +/* + * Atomic exclusive load from addr, it returns the 32-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ +#define __LOAD_EXC_32(src, dst, memorder) { \ + if (memorder == __ATOMIC_RELAXED) { \ + asm volatile("ldxr %w[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src)\ + : "memory"); \ + } else { \ + asm volatile("ldaxr %w[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src)\ + : "memory"); \ + } } + +/* + * Atomic exclusive load from addr, it returns the 64-bit content of + * *addr while making it 'monitored', when it is written by someone + * else, the 'monitored' state is cleared and a event is generated + * implicitly to exit WFE. + */ +#define __LOAD_EXC_64(src, dst, memorder) { \ + if (memorder == __ATOMIC_RELAXED) { \ + asm volatile("ldxr %x[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src)\ + : "memory"); \ + } else { \ + asm volatile("ldaxr %x[tmp], [%x[addr]]" \ + : [tmp] "=&r" (dst) \ + : [addr] "r"(src)\ + : "memory"); \ + } } + +static __rte_always_inline void +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, + int memorder) +{ + uint16_t value; + + assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); + __LOAD_EXC_16(addr, value, memorder) if (value != expected) { __SEVL() @@ -66,7 +104,6 @@ rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, __LOAD_EXC_16(addr, value, memorder) } while (value != expected); } -#undef __LOAD_EXC_16 } static __rte_always_inline void @@ -77,25 +114,6 @@ rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, assert(memorder == __ATOMIC_ACQUIRE || memorder == __ATOMIC_RELAXED); - /* -* Atomic exclusive load from addr, it returns the 32-bit content of -* *addr while making it 'monitored',when it is written by someone -* else, the 'monitored' state is cleared and a event is generated -* implicitly to exit WFE. -*/ -#define __LOAD_EXC_32(src, dst, memorder) { \ - if (memorder == __ATOMIC_RELAXED) { \ - asm volatile("ldxr %w[tmp], [%x[addr]]" \ - : [tmp] "=&r" (dst) \ - : [addr] "r"(src)\ -
[dpdk-dev] [RFC PATCH v3 2/5] eal: use wait event for read pflock
Instead of polling for read pflock update, use wait event scheme for this case. Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- lib/eal/include/generic/rte_pflock.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/eal/include/generic/rte_pflock.h b/lib/eal/include/generic/rte_pflock.h index e57c179ef2..9865f1349c 100644 --- a/lib/eal/include/generic/rte_pflock.h +++ b/lib/eal/include/generic/rte_pflock.h @@ -121,9 +121,7 @@ rte_pflock_read_lock(rte_pflock_t *pf) return; /* Wait for current write phase to complete. */ - while ((__atomic_load_n(&pf->rd.in, __ATOMIC_ACQUIRE) - & RTE_PFLOCK_WBITS) == w) - rte_pause(); + rte_wait_event_16(&pf->rd.in, RTE_PFLOCK_WBITS, w, ==, __ATOMIC_ACQUIRE); } /** -- 2.25.1
[dpdk-dev] [RFC PATCH v3 3/5] eal: use wait event scheme for mcslock
Instead of polling for mcslock to be updated, use wait event scheme for this case. Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- lib/eal/include/generic/rte_mcslock.h | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/eal/include/generic/rte_mcslock.h b/lib/eal/include/generic/rte_mcslock.h index 9f323bd2a2..c8d1c4f38f 100644 --- a/lib/eal/include/generic/rte_mcslock.h +++ b/lib/eal/include/generic/rte_mcslock.h @@ -84,8 +84,7 @@ rte_mcslock_lock(rte_mcslock_t **msl, rte_mcslock_t *me) * to spin on me->locked until the previous lock holder resets * the me->locked using mcslock_unlock(). */ - while (__atomic_load_n(&me->locked, __ATOMIC_ACQUIRE)) - rte_pause(); + rte_wait_event_32(&me->locked, INT_MAX, 0, !=, __ATOMIC_ACQUIRE); } /** @@ -117,8 +116,13 @@ rte_mcslock_unlock(rte_mcslock_t **msl, rte_mcslock_t *me) /* More nodes added to the queue by other CPUs. * Wait until the next pointer is set. */ - while (__atomic_load_n(&me->next, __ATOMIC_RELAXED) == NULL) - rte_pause(); +#ifdef RTE_ARCH_32 + rte_wait_event_32((uint32_t *)&me->next, UINT_MAX, 0, ==, + __ATOMIC_RELAXED); +#else + rte_wait_event_64((uint64_t *)&me->next, ULONG_MAX, 0, ==, + __ATOMIC_RELAXED); +#endif } /* Pass lock to next waiter. */ -- 2.25.1
[dpdk-dev] [RFC PATCH v3 4/5] lib/bpf: use wait event scheme for Rx/Tx iteration
Instead of polling for cbi->use to be updated, use wait event scheme. Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- lib/bpf/bpf_pkt.c | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/lib/bpf/bpf_pkt.c b/lib/bpf/bpf_pkt.c index 6e8248f0d6..3af15ae97b 100644 --- a/lib/bpf/bpf_pkt.c +++ b/lib/bpf/bpf_pkt.c @@ -113,7 +113,7 @@ bpf_eth_cbi_unuse(struct bpf_eth_cbi *cbi) static void bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) { - uint32_t nuse, puse; + uint32_t puse; /* make sure all previous loads and stores are completed */ rte_smp_mb(); @@ -122,11 +122,8 @@ bpf_eth_cbi_wait(const struct bpf_eth_cbi *cbi) /* in use, busy wait till current RX/TX iteration is finished */ if ((puse & BPF_ETH_CBI_INUSE) != 0) { - do { - rte_pause(); - rte_compiler_barrier(); - nuse = cbi->use; - } while (nuse == puse); + rte_compiler_barrier(); + rte_wait_event_32(&cbi->use, UINT_MAX, puse, ==, __ATOMIC_RELAXED); } } -- 2.25.1
[dpdk-dev] [RFC PATCH v3 5/5] lib/distributor: use wait event scheme
Instead of polling for bufptr64 to be updated, use wait event for this case. Signed-off-by: Feifei Wang Reviewed-by: Ruifeng Wang --- lib/distributor/rte_distributor_single.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/distributor/rte_distributor_single.c b/lib/distributor/rte_distributor_single.c index f4725b1d0b..86cab349f4 100644 --- a/lib/distributor/rte_distributor_single.c +++ b/lib/distributor/rte_distributor_single.c @@ -33,9 +33,8 @@ rte_distributor_request_pkt_single(struct rte_distributor_single *d, union rte_distributor_buffer_single *buf = &d->bufs[worker_id]; int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_GET_BUF; - while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED) - & RTE_DISTRIB_FLAGS_MASK)) - rte_pause(); + rte_wait_event_64(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, + 0, !=, __ATOMIC_RELAXED); /* Sync with distributor on GET_BUF flag. */ __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE); @@ -74,9 +73,8 @@ rte_distributor_return_pkt_single(struct rte_distributor_single *d, union rte_distributor_buffer_single *buf = &d->bufs[worker_id]; uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) | RTE_DISTRIB_RETURN_BUF; - while (unlikely(__atomic_load_n(&buf->bufptr64, __ATOMIC_RELAXED) - & RTE_DISTRIB_FLAGS_MASK)) - rte_pause(); + rte_wait_event_64(&buf->bufptr64, RTE_DISTRIB_FLAGS_MASK, + 0, !=, __ATOMIC_RELAXED); /* Sync with distributor on RETURN_BUF flag. */ __atomic_store_n(&(buf->bufptr64), req, __ATOMIC_RELEASE); -- 2.25.1
[dpdk-dev] [PATCH v2] net/ice: enable Rx timestamp on Flex Descriptor
Use the dynamic mbuf to register timestamp field and flag. The ice has the feature to dump Rx timestamp value into dynamic mbuf field by flex descriptor. This feature is turned on by dev config "enable-rx-timestamp". Currently, it's only supported under scalar path. Signed-off-by: Simei Su --- v2: * Refine release notes. * Merge two helper functions into one. * Remove one field in ice_rx_queue structure. doc/guides/rel_notes/release_21_11.rst | 3 +- drivers/net/ice/ice_ethdev.c | 6 ++- drivers/net/ice/ice_rxtx.c | 88 ++ drivers/net/ice/ice_rxtx.h | 4 ++ drivers/net/ice/ice_rxtx_vec_common.h | 3 ++ 5 files changed, 102 insertions(+), 2 deletions(-) diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst index 19356ac..0bbe82c 100644 --- a/doc/guides/rel_notes/release_21_11.rst +++ b/doc/guides/rel_notes/release_21_11.rst @@ -75,7 +75,8 @@ New Features * **Updated Intel ice driver.** - Added 1PPS out support by a devargs. + * Added 1PPS out support by a devargs. + * Added DEV_RX_OFFLOAD_TIMESTAMP support. * **Updated Marvell cnxk ethdev driver.** diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c index e24a3b6..534af03 100644 --- a/drivers/net/ice/ice_ethdev.c +++ b/drivers/net/ice/ice_ethdev.c @@ -31,6 +31,9 @@ #define ICE_HW_DEBUG_MASK_ARG "hw_debug_mask" #define ICE_ONE_PPS_OUT_ARG "pps_out" +uint64_t ice_timestamp_dynflag; +int ice_timestamp_dynfield_offset = -1; + static const char * const ice_valid_args[] = { ICE_SAFE_MODE_SUPPORT_ARG, ICE_PIPELINE_MODE_SUPPORT_ARG, @@ -3652,7 +3655,8 @@ ice_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_RX_OFFLOAD_QINQ_STRIP | DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | DEV_RX_OFFLOAD_VLAN_EXTEND | - DEV_RX_OFFLOAD_RSS_HASH; + DEV_RX_OFFLOAD_RSS_HASH | + DEV_RX_OFFLOAD_TIMESTAMP; dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_QINQ_INSERT | DEV_TX_OFFLOAD_IPV4_CKSUM | diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index 5d7ab4f..a932fcc 100644 --- a/drivers/net/ice/ice_rxtx.c +++ b/drivers/net/ice/ice_rxtx.c @@ -302,6 +302,18 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) } } + if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) { + /* Register mbuf field and flag for Rx timestamp */ + err = rte_mbuf_dyn_rx_timestamp_register( + &ice_timestamp_dynfield_offset, + &ice_timestamp_dynflag); + if (err != 0) { + PMD_INIT_LOG(ERR, + "Cannot register mbuf field/flag for timestamp"); + return -EINVAL; + } + } + memset(&rx_ctx, 0, sizeof(rx_ctx)); rx_ctx.base = rxq->rx_ring_dma / ICE_QUEUE_BASE_ADDR_UNIT; @@ -354,6 +366,9 @@ ice_program_hw_rx_queue(struct ice_rx_queue *rxq) regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) & QRXFLXP_CNTXT_RXDID_PRIO_M; + if (rxq->offloads & DEV_RX_OFFLOAD_TIMESTAMP) + regval |= QRXFLXP_CNTXT_TS_M; + ICE_WRITE_REG(hw, QRXFLXP_CNTXT(rxq->reg_idx), regval); err = ice_clear_rxq_ctx(hw, rxq->reg_idx); @@ -1530,6 +1545,35 @@ ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_flex_desc *rxdp) mb->vlan_tci, mb->vlan_tci_outer); } +uint64_t +ice_tstamp_convert_32b_64b(struct ice_hw *hw, uint32_t in_timestamp) +{ + const uint64_t mask = 0x; + uint32_t hi, lo, lo2, delta; + uint64_t time, ns; + + lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0)); + hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0)); + lo2 = ICE_READ_REG(hw, GLTSYN_TIME_L(0)); + + if (lo2 < lo) { + lo = ICE_READ_REG(hw, GLTSYN_TIME_L(0)); + hi = ICE_READ_REG(hw, GLTSYN_TIME_H(0)); + } + + time = ((uint64_t)hi << 32) | lo; + + delta = (in_timestamp - (uint32_t)(time & mask)); + if (delta > (mask / 2)) { + delta = ((uint32_t)(time & mask) - in_timestamp); + ns = time - delta; + } else { + ns = time + delta; + } + + return ns; +} + #define ICE_LOOK_AHEAD 8 #if (ICE_LOOK_AHEAD != 8) #error "PMD ICE: ICE_LOOK_AHEAD must be 8\n" @@ -1546,6 +1590,9 @@ ice_rx_scan_hw_ring(struct ice_rx_queue *rxq) int32_t i, j, nb_rx = 0; uint64_t pkt_flags = 0; uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; + struct ice_vsi *vsi = rxq->vsi; + struct ice_hw *hw = ICE_VSI_TO_HW(vsi); + uint64_t ts_ns; rxdp = &rxq->rx_ring[rxq->rx_tail]; rxep = &rxq->sw_rin