[dpdk-dev] [PATCH] ixgbe_rx_scan_hw_ring: Fix initializing id and hash fields in flow director mode.
When Flow Director was used together with bulk alloc, id and hash was swapped when packet matches flow director filter due to improper fdir field initialization. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 24 +--- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index aaf46d4..796e5a4 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -902,6 +902,7 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq) struct igb_rx_entry *rxep; struct rte_mbuf *mb; uint16_t pkt_len; + uint16_t pkt_flags; int s[LOOK_AHEAD], nb_dd; int i, j, nb_rx = 0; @@ -935,21 +936,30 @@ ixgbe_rx_scan_hw_ring(struct igb_rx_queue *rxq) /* Translate descriptor info to mbuf format */ for (j = 0; j < nb_dd; ++j) { mb = rxep[j].mbuf; - pkt_len = (uint16_t)(rxdp[j].wb.upper.length - - rxq->crc_len); + pkt_len = (uint16_t)(rxdp[j].wb.upper.length - rxq->crc_len); mb->pkt.data_len = pkt_len; mb->pkt.pkt_len = pkt_len; - mb->pkt.vlan_macip.f.vlan_tci = rxdp[j].wb.upper.vlan; - mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss; + mb->pkt.vlan_macip.f.vlan_tci = + rte_le_to_cpu_16(rxdp[j].wb.upper.vlan); /* convert descriptor fields to rte mbuf flags */ - mb->ol_flags = rx_desc_hlen_type_rss_to_pkt_flags( + pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags( rxdp[j].wb.lower.lo_dword.data); /* reuse status field from scan list */ - mb->ol_flags = (uint16_t)(mb->ol_flags | + pkt_flags = (uint16_t)(pkt_flags | rx_desc_status_to_pkt_flags(s[j])); - mb->ol_flags = (uint16_t)(mb->ol_flags | + pkt_flags = (uint16_t)(pkt_flags | rx_desc_error_to_pkt_flags(s[j])); + mb->ol_flags = pkt_flags; + + if (likely(pkt_flags & PKT_RX_RSS_HASH)) + mb->pkt.hash.rss = rxdp[j].wb.lower.hi_dword.rss; + else if (pkt_flags & PKT_RX_FDIR) { + mb->pkt.hash.fdir.hash = + (uint16_t)((rxdp[j].wb.lower.hi_dword.csum_ip.csum) + & IXGBE_ATR_HASH_MASK); + mb->pkt.hash.fdir.id = rxdp[j].wb.lower.hi_dword.csum_ip.ip_id; + } } /* Move mbuf pointers from the S/W ring to the stage */ -- 1.7.9.5
[dpdk-dev] [PATCH] compiling kni module on Ubunutu 12.04 failed
On Ubuntu 12.04.4 file '/proc/version_signature' contains 'Ubuntu 3.11.0-15.25~precise1-generic 3.11.10'. This introduce compilation error since '~precise1' will not be discarded. This patch discards everything after '~' inclusively. Signed-off-by: Pawel Wodkowski --- lib/librte_eal/linuxapp/kni/Makefile |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile index 2799191..e2902af 100644 --- a/lib/librte_eal/linuxapp/kni/Makefile +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -47,7 +47,7 @@ MODULE_CFLAGS += -Wall -Werror ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) UBUNTU_KERNEL_CODE := $(shell cut -d' ' -f2 /proc/version_signature | \ -cut -d- -f1,2 | tr .- $(comma)) +cut -d'~' -f1 | cut -d- -f1,2 | tr .- $(comma)) MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" endif -- 1.7.9.5
[dpdk-dev] [PATCH] igb_uio: Remove code that blocks support of some integrated NICs
This patch removes obsolete code that prevents adding integrated copper NICs 82575EB and I350 to list of supported NICs. --- app/test/test_pci.c |1 - lib/librte_eal/common/include/rte_pci_dev_ids.h |8 2 files changed, 9 deletions(-) diff --git a/app/test/test_pci.c b/app/test/test_pci.c index 6908d04..7067dfb 100644 --- a/app/test/test_pci.c +++ b/app/test/test_pci.c @@ -81,7 +81,6 @@ struct rte_pci_id my_driver_id2[] = { /* IGB & EM NICS */ #define RTE_PCI_DEV_ID_DECL_EM(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#define RTE_PCI_DEV_USE_82575EB_COPPER #include { .vendor_id = 0, /* sentinel */ }, diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h index a51c1ef..3e27025 100644 --- a/lib/librte_eal/common/include/rte_pci_dev_ids.h +++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -323,11 +323,7 @@ RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) -/* This device is the on-board NIC on some development boards. */ -#ifdef RTE_PCI_DEV_USE_82575EB_COPPER RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) -#endif - RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) @@ -338,11 +334,7 @@ RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) -/* This device is the on-board NIC on some development boards. */ -#ifndef RTE_PCI_DEV_NO_USE_I350_COPPER RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) -#endif - RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) -- 1.7.9.5
[dpdk-dev] [PATCH v2 0/2] cmdline: add polling mode for command line
This patchset adds the ability to process console input in the same thread as packet processing by using poll() function and fixes some minor issues. v2 changes: - add doxygen documentation for cmdline_poll() - map file issue fixed - use proper email address. - add addtional missing include in cmdline_parse_ipaddr.h Pawel Wodkowski (2): cmdline: fix missing include files cmdline: add polling mode for command line doc/api/doxy-api.conf | 1 + lib/librte_cmdline/cmdline.c | 35 ++ lib/librte_cmdline/cmdline.h | 24 lib/librte_cmdline/cmdline_parse_ipaddr.h | 2 ++ lib/librte_cmdline/cmdline_rdline.h| 1 + lib/librte_cmdline/cmdline_vt100.h | 2 ++ lib/librte_cmdline/rte_cmdline_version.map | 8 +++ 7 files changed, 73 insertions(+) -- 1.9.1
[dpdk-dev] [PATCH v2 1/2] cmdline: fix missing include files
When including only some of library headers some definitions are missing and build fails. Signed-off-by: Pawel Wodkowski --- lib/librte_cmdline/cmdline.h | 3 +++ lib/librte_cmdline/cmdline_parse_ipaddr.h | 2 ++ lib/librte_cmdline/cmdline_rdline.h | 1 + lib/librte_cmdline/cmdline_vt100.h| 2 ++ 4 files changed, 8 insertions(+) diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h index 06ae086..9085ff6 100644 --- a/lib/librte_cmdline/cmdline.h +++ b/lib/librte_cmdline/cmdline.h @@ -61,6 +61,9 @@ #ifndef _CMDLINE_H_ #define _CMDLINE_H_ +#include +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.h b/lib/librte_cmdline/cmdline_parse_ipaddr.h index 296c374..46c6e1b 100644 --- a/lib/librte_cmdline/cmdline_parse_ipaddr.h +++ b/lib/librte_cmdline/cmdline_parse_ipaddr.h @@ -61,6 +61,8 @@ #ifndef _PARSE_IPADDR_H_ #define _PARSE_IPADDR_H_ +#include + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_cmdline/cmdline_rdline.h b/lib/librte_cmdline/cmdline_rdline.h index ae6e24e..b9aad9b 100644 --- a/lib/librte_cmdline/cmdline_rdline.h +++ b/lib/librte_cmdline/cmdline_rdline.h @@ -84,6 +84,7 @@ * instance. */ +#include #include #include diff --git a/lib/librte_cmdline/cmdline_vt100.h b/lib/librte_cmdline/cmdline_vt100.h index b9840f6..963add8 100644 --- a/lib/librte_cmdline/cmdline_vt100.h +++ b/lib/librte_cmdline/cmdline_vt100.h @@ -61,6 +61,8 @@ #ifndef _CMDLINE_VT100_H_ #define _CMDLINE_VT100_H_ +#include + #ifdef __cplusplus extern "C" { #endif -- 1.9.1
[dpdk-dev] [PATCH v2 2/2] cmdline: add polling mode for command line
This patch adds the ability to process console input in the same thread as packet processing by using poll() function. Signed-off-by: Pawel Wodkowski --- doc/api/doxy-api.conf | 1 + lib/librte_cmdline/cmdline.c | 35 ++ lib/librte_cmdline/cmdline.h | 21 ++ lib/librte_cmdline/rte_cmdline_version.map | 8 +++ 4 files changed, 65 insertions(+) diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf index 50b0105..51b11c7 100644 --- a/doc/api/doxy-api.conf +++ b/doc/api/doxy-api.conf @@ -33,6 +33,7 @@ INPUT = doc/api/doxy-api-index.md \ lib/librte_eal/common/include \ lib/librte_eal/common/include/generic \ lib/librte_acl \ + lib/librte_cmdline \ lib/librte_distributor \ lib/librte_ether \ lib/librte_hash \ diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c index e61c4f2..6a55f1f 100644 --- a/lib/librte_cmdline/cmdline.c +++ b/lib/librte_cmdline/cmdline.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -246,6 +247,40 @@ cmdline_quit(struct cmdline *cl) rdline_quit(&cl->rdl); } +int +cmdline_poll(struct cmdline *cl) +{ + struct pollfd pfd; + int status; + ssize_t read_status; + char c; + + if (!cl) + return -EINVAL; + else if (cl->rdl.status == RDLINE_EXITED) + return RDLINE_EXITED; + + pfd.fd = cl->s_in; + pfd.events = POLLIN; + pfd.revents = 0; + + status = poll(&pfd, 1, 0); + if (status < 0) + return status; + else if (status > 0) { + c = -1; + read_status = read(cl->s_in, &c, 1); + if (read_status < 0) + return read_status; + + status = cmdline_in(cl, &c, 1); + if (status < 0 && cl->rdl.status != RDLINE_EXITED) + return status; + } + + return cl->rdl.status; +} + void cmdline_interact(struct cmdline *cl) { diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h index 9085ff6..2578ca8 100644 --- a/lib/librte_cmdline/cmdline.h +++ b/lib/librte_cmdline/cmdline.h @@ -64,6 +64,12 @@ #include #include +/** + * @file + * + * Command line API + */ + #ifdef __cplusplus extern "C" { #endif @@ -84,6 +90,21 @@ void cmdline_printf(const struct cmdline *cl, const char *fmt, ...) __attribute__((format(printf,2,3))); int cmdline_in(struct cmdline *cl, const char *buf, int size); int cmdline_write_char(struct rdline *rdl, char c); + +/** + * This function is nonblocking equivalent of ``cmdline_interact()``. It polls + * *cl* for one character and interpret it. If return value is *RDLINE_EXITED* + * it mean that ``cmdline_quit()`` was invoked. + * + * @param cl + * The command line object. + * + * @return + * On success return object status - one of *enum rdline_status*. + * On error return negative value. + */ +int cmdline_poll(struct cmdline *cl); + void cmdline_interact(struct cmdline *cl); void cmdline_quit(struct cmdline *cl); diff --git a/lib/librte_cmdline/rte_cmdline_version.map b/lib/librte_cmdline/rte_cmdline_version.map index 6193462..1b0c863 100644 --- a/lib/librte_cmdline/rte_cmdline_version.map +++ b/lib/librte_cmdline/rte_cmdline_version.map @@ -69,3 +69,11 @@ DPDK_2.0 { local: *; }; + +DPDK_2.1 { + global: + + cmdline_poll; + + local: *; +} DPDK_2.0; -- 1.9.1
[dpdk-dev] [PATCH 0/2] bond: add mode 4 support
This patch set adds support of mode 4 to link bonding pmd. It also introduce some minor changes to the orginal pmd driver to easer integrate mode 4. This patchset depend on Declan Doherty patch set: http://dpdk.org/ml/archives/dev/2014-September/005069.html Pawel Wodkowski (2): bond: Extract common code to separate functions bond: Add mode4 (802.3AX) lib/librte_ether/rte_ether.h |1 + lib/librte_pmd_bond/Makefile |1 + lib/librte_pmd_bond/rte_eth_bond.h |4 + lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1064 lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 411 +++ lib/librte_pmd_bond/rte_eth_bond_api.c | 79 ++- lib/librte_pmd_bond/rte_eth_bond_args.c|1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 224 +- lib/librte_pmd_bond/rte_eth_bond_private.h | 37 +- 9 files changed, 1781 insertions(+), 41 deletions(-) create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h -- 1.7.9.5
[dpdk-dev] [PATCH 1/2] bond: extract common code to separate functions
Signed-off-by: Pawel Wodkowski Reviewed-by: Declan Doherty --- lib/librte_pmd_bond/rte_eth_bond_api.c | 59 +--- lib/librte_pmd_bond/rte_eth_bond_pmd.c | 47 ++ lib/librte_pmd_bond/rte_eth_bond_private.h | 30 -- 3 files changed, 102 insertions(+), 34 deletions(-) diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c index dd33119..460df65 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_api.c +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include #include #include @@ -104,6 +106,39 @@ valid_slave_port_id(uint8_t port_id) return 0; } +void +rte_eth_bond_activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id ) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + internals->active_slaves[active_count] = port_id; + + + internals->active_slave_count = active_count + 1; +} + +void +rte_eth_bond_deactive_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos ) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + active_count--; + + /* If slave was not at the end of the list +* shift active slaves up active array list */ + if (slave_pos < active_count) { + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + internals->active_slave_count = active_count; +} + uint8_t number_of_sockets(void) { @@ -356,10 +391,8 @@ rte_eth_bond_slave_add(uint8_t bonded_port_id, uint8_t slave_port_id) if (bonded_eth_dev->data->dev_started) { rte_eth_link_get_nowait(slave_port_id, &link_props); -if (link_props.link_status == 1) { - internals->active_slaves[internals->active_slave_count++] = - slave_port_id; - } +if (link_props.link_status == 1) + rte_eth_bond_activate_slave(bonded_eth_dev, slave_port_id); } return 0; @@ -373,6 +406,7 @@ err_add: int rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id) { + struct rte_eth_dev *eth_dev; struct bond_dev_private *internals; struct slave_conf *slave_conf; @@ -386,20 +420,15 @@ rte_eth_bond_slave_remove(uint8_t bonded_port_id, uint8_t slave_port_id) if (valid_slave_port_id(slave_port_id) != 0) goto err_del; - internals = rte_eth_devices[bonded_port_id].data->dev_private; + eth_dev = &rte_eth_devices[bonded_port_id]; + internals = eth_dev->data->dev_private; /* first remove from active slave list */ - for (i = 0; i < internals->active_slave_count; i++) { - if (internals->active_slaves[i] == slave_port_id) - pos = i; - - /* shift active slaves up active array list */ - if (pos >= 0 && i < (internals->active_slave_count - 1)) - internals->active_slaves[i] = internals->active_slaves[i+1]; - } + pos = find_slave_by_id(internals->active_slaves, internals->active_slave_count, + slave_port_id); - if (pos >= 0) - internals->active_slave_count--; + if (pos < internals->active_slave_count) + rte_eth_bond_deactive_slave(eth_dev, pos); pos = -1; /* now remove from slave list */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c index 38cc1ae..482ddb8 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c @@ -447,6 +447,27 @@ link_properties_valid(struct rte_eth_link *bonded_dev_link, } int +mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr) +{ + struct ether_addr *mac_addr; + + mac_addr = eth_dev->data->mac_addrs; + + if (eth_dev == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer eth_dev specified\n", __func__); + return -1; + } + + if (dst_mac_addr == NULL) { + RTE_LOG(ERR, PMD, "%s: NULL pointer MAC specified\n", __func__); + return -1; + } + + ether_addr_copy(mac_addr, dst_mac_addr); + return 0; +} + +int mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new
[dpdk-dev] [PATCH 2/2] bond: add mode 4 support
Signed-off-by: Pawel Wodkowski Signed-off-by: Maciej T Gajdzica Reviewed-by: Declan Doherty --- lib/librte_ether/rte_ether.h |1 + lib/librte_pmd_bond/Makefile |1 + lib/librte_pmd_bond/rte_eth_bond.h |4 + lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1064 lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 411 +++ lib/librte_pmd_bond/rte_eth_bond_api.c | 28 +- lib/librte_pmd_bond/rte_eth_bond_args.c|1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 179 - lib/librte_pmd_bond/rte_eth_bond_private.h |9 +- 9 files changed, 1685 insertions(+), 13 deletions(-) create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h index 2e08f23..1a3711b 100644 --- a/lib/librte_ether/rte_ether.h +++ b/lib/librte_ether/rte_ether.h @@ -293,6 +293,7 @@ struct vlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #ifdef __cplusplus } diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile index 953d75e..c2312c2 100644 --- a/lib/librte_pmd_bond/Makefile +++ b/lib/librte_pmd_bond/Makefile @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) # SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c # diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index bd59780..6aac4ec 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,10 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_8023AD(4) +/**< 802.3AD (Mode 4). + * In this mode transmission and reception of packets is managed by LACP + * protocol specified in 802.3AD documentation. */ /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c new file mode 100644 index 000..6ce6efb --- /dev/null +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c @@ -0,0 +1,1064 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include + +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad.h" + +#include + +#define RTE_LIBRTE_BOND_DEBUG_8023AX + +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX +#define BOND_ASSERT(expr) \ + ((expr) ? (void) (0) \ + : rte_panic("%s(%d): assertion failed" __STRING(expr), __FILE__, __LINE__)) +#else +#define BOND_ASSERT(expr) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AX +#define _PORT_ID internals->active_slaves[port_num] +#d
[dpdk-dev] [PATCH v2] bond: Add mode 4 support.
This patch adds support mode 4 of link bonding. It depend on Delcan Doherty patches v3 and rte alarms patch v2 or above. New version handles race issues with setting/cancelin callbacks, fixes promiscus mode setting in mode 4 and some other minor errors in mode 4 implementation. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ether.h |1 + lib/librte_pmd_bond/Makefile |1 + lib/librte_pmd_bond/rte_eth_bond.h |4 + lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++--- lib/librte_pmd_bond/rte_eth_bond_args.c|1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 +--- lib/librte_pmd_bond/rte_eth_bond_private.h | 42 - 7 files changed, 346 insertions(+), 46 deletions(-) diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h index 2e08f23..1a3711b 100644 --- a/lib/librte_ether/rte_ether.h +++ b/lib/librte_ether/rte_ether.h @@ -293,6 +293,7 @@ struct vlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #ifdef __cplusplus } diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile index 953d75e..c2312c2 100644 --- a/lib/librte_pmd_bond/Makefile +++ b/lib/librte_pmd_bond/Makefile @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) # SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c # diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index 6811c7b..b0223c2 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,10 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_8023AD(4) +/**< 802.3AD (Mode 4). + * In this mode transmission and reception of packets is managed by LACP + * protocol specified in 802.3AD documentation. */ /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_api.c b/lib/librte_pmd_bond/rte_eth_bond_api.c index c690ceb..c547164 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_api.c +++ b/lib/librte_pmd_bond/rte_eth_bond_api.c @@ -31,6 +31,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include + #include #include #include @@ -104,6 +106,44 @@ valid_slave_port_id(uint8_t port_id) return 0; } +void +activate_slave(struct rte_eth_dev *eth_dev, uint8_t port_id) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + internals->active_slaves[active_count] = port_id; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_slave_append(eth_dev); + + internals->active_slave_count = active_count + 1; +} + +void +deactivate_slave(struct rte_eth_dev *eth_dev, + uint8_t slave_pos) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint8_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_deactivate_slave(eth_dev, slave_pos); + + active_count--; + + /* If slave was not at the end of the list +* shift active slaves up active array list */ + if (slave_pos < active_count) { + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + internals->active_slave_count = active_count; +} + uint8_t number_of_sockets(void) { @@ -216,12 +256,8 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) eth_dev->dev_ops = &default_dev_ops; eth_dev->pci_dev = pci_dev; - if (bond_ethdev_mode_set(eth_dev, mode)) { - RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode too %d", -eth_dev->data->port_id, mode); - goto err; - } - + internals->port_id = eth_dev->data->port_id; + internals->mode = BONDING_MODE_INVALID; internals->current_primary_port = 0; internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; internals->user_defined_mac =
[dpdk-dev] [PATCH v2] bond: Add mode 4 support.
This patch adds support mode 4 of link bonding. It depend on Delcan Doherty patches v3 and rte alarms patch v2 or above. New version handles race issues with setting/cancelin callbacks, fixes promiscus mode setting in mode 4 and some other minor errors in mode 4 implementation. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ether.h |1 + lib/librte_pmd_bond/Makefile |1 + lib/librte_pmd_bond/rte_eth_bond.h |4 + lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 1070 lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 405 +++ lib/librte_pmd_bond/rte_eth_bond_api.c | 82 ++- lib/librte_pmd_bond/rte_eth_bond_args.c|1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 261 ++- lib/librte_pmd_bond/rte_eth_bond_private.h | 42 +- 9 files changed, 1821 insertions(+), 46 deletions(-) create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.c create mode 100644 lib/librte_pmd_bond/rte_eth_bond_8023ad.h diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h index 2e08f23..1a3711b 100644 --- a/lib/librte_ether/rte_ether.h +++ b/lib/librte_ether/rte_ether.h @@ -293,6 +293,7 @@ struct vlan_hdr { #define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ #define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ #define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ #ifdef __cplusplus } diff --git a/lib/librte_pmd_bond/Makefile b/lib/librte_pmd_bond/Makefile index 953d75e..c2312c2 100644 --- a/lib/librte_pmd_bond/Makefile +++ b/lib/librte_pmd_bond/Makefile @@ -44,6 +44,7 @@ CFLAGS += $(WERROR_FLAGS) # SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c # diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index 6811c7b..b0223c2 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,10 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_8023AD(4) +/**< 802.3AD (Mode 4). + * In this mode transmission and reception of packets is managed by LACP + * protocol specified in 802.3AD documentation. */ /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c new file mode 100644 index 000..de416c6 --- /dev/null +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c @@ -0,0 +1,1070 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include + +#include "rte_eth_bond_private.h" +#include "rte_eth_bond_8023ad.h" + +#include + +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD +#define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, \ +
[dpdk-dev] [PATCH 4/5] bond mode 4: allow external state machine
On 2015-04-06 19:01, Eric Kinzie wrote: Interesting patch. I will closer look at this tomorrow. For now I have first comments: > +static void bond_mode_8023ad_ext_periodic_cb(void *arg); > + > #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD > #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, > \ > bond_dbg_get_time_diff_ms(), slave_id, \ > @@ -1014,6 +1016,8 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, > conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; > conf->update_timeout_ms = mode4->update_timeout_us / 1000; > conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks; > + conf->slowrx_cb = mode4->slowrx_cb; > + conf->external_sm = mode4->external_sm; mode4->external_sm flag realy needed? Why do not use mode4->slowrx_cb as external state machine indicator? > } > > void > @@ -1035,6 +1039,8 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev, > conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS; > conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS; > conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS; > + conf->slowrx_cb = NULL; > + conf->external_sm = 0; > } > > mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks; > @@ -1045,6 +1051,8 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev, > mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks; > mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks; > mode4->update_timeout_us = conf->update_timeout_ms * 1000; > + mode4->slowrx_cb = conf->slowrx_cb; > + mode4->external_sm = conf->external_sm; > } > > int > @@ -1062,6 +1070,13 @@ bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev) > int > bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) > { > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_private *mode4 = &internals->mode4; > + > + if (mode4->external_sm) > + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * > 1000, > + &bond_mode_8023ad_ext_periodic_cb, bond_dev); > + > return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, > &bond_mode_8023ad_periodic_cb, bond_dev); > } > @@ -1069,6 +1084,13 @@ bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) > void > bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) > { > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_private *mode4 = &internals->mode4; > + > + if (mode4->external_sm) { This is bad idea. If bond_mode_8023ad_setup will be called you might have two handlers running for while. You should stop mode 4 by invoking bond_mode_8023ad_stop() before you set mode4->external_sm and then, if mode 4 was running, start it again. Also, maybe a renaming "external_sm" to "state_machine_cb", set it to against default one and using it without "if()" will simplify code. It is no crucial but will eliminate couple of if's. In rte_eth_bond_8023ad_ext_slowtx() you can compare it against default one. > + rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb, > bond_dev); > + return; > + } > rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev); > } > > @@ -1215,3 +1237,156 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, > uint8_t slave_id, > info->agg_port_id = port->aggregator_port_id; > return 0; > } -- Pawel
[dpdk-dev] [PATCH 4/5] bond mode 4: allow external state machine
On 2015-04-07 16:18, Pawel Wodkowski wrote: > > Also, maybe a renaming "external_sm" to "state_machine_cb", set it to > against default one and using it without "if()" will simplify code. It > is no crucial but will eliminate couple of if's. In > rte_eth_bond_8023ad_ext_slowtx() you can compare it against default one. Oh, I read what I wrote :) Please ignore that. -- Pawel
[dpdk-dev] [PATCH 1/5] bond: use existing enslaved device queues
On 2015-04-06 19:01, Eric Kinzie wrote: > If a device to be enslaved already has transmit and/or receive queues > allocated, use those and then create any additional queues that are > necessary. > > Signed-off-by: Eric Kinzie > --- > lib/librte_pmd_bond/rte_eth_bond_pmd.c |8 ++-- > 1 file changed, 6 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c > b/lib/librte_pmd_bond/rte_eth_bond_pmd.c > index c937e6b..4fd7d97 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c > @@ -1318,7 +1318,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, > } > > /* Setup Rx Queues */ > - for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { > + /* Use existing queues, if any */ > + for (q_id = slave_eth_dev->data->nb_rx_queues; > + q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { > bd_rx_q = (struct bond_rx_queue > *)bonded_eth_dev->data->rx_queues[q_id]; > > errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, > q_id, > @@ -1334,7 +1336,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, > } > > /* Setup Tx Queues */ > - for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { > + /* Use existing queues, if any */ > + for (q_id = slave_eth_dev->data->nb_tx_queues; > + q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { > bd_tx_q = (struct bond_tx_queue > *)bonded_eth_dev->data->tx_queues[q_id]; > > errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, > q_id, > Why you want to do that? As far as I am aware (but Declan Doherty should speak here to) purpose of this part of code is to have configuration of queues in slaves consistent with bd_rx_q/bd_tx_q. If you skip reconfiguration of queues that are already configured in port you can have them configured in different way after enslaving. So again: what is the purpose of doing so? -- Pawel
[dpdk-dev] [PATCH 2/5] bond mode 4: copy entire config structure
On 2015-04-06 19:01, Eric Kinzie wrote: >Copy all needed fields from the mode8023ad_private structure in >bond_mode_8023ad_conf_get(). > > Signed-off-by: Eric Kinzie > --- > lib/librte_pmd_bond/rte_eth_bond_8023ad.c |1 + > 1 file changed, 1 insertion(+) > > diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > index 97a828e..1009d5b 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > @@ -1013,6 +1013,7 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, > conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / > ms_ticks; > conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; > conf->update_timeout_ms = mode4->update_timeout_us / 1000; > + conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks; > } > > void > This is bugfix. Acked-by: Pawel Wodkowski -- Pawel
[dpdk-dev] [PATCH 3/5] bond mode 4: do not ignore multicast
On 2015-04-06 19:01, Eric Kinzie wrote: > if (unlikely(hdr->ether_type == ether_type_slow_be || > !collecting || (!promisc && > - !is_same_ether_addr(&bond_mac, > &hdr->d_addr { > + (!is_multicast_ether_addr(&hdr->d_addr) > && > + !is_same_ether_addr(&bond_mac, > &hdr->d_addr) { > You can drop extra parenthesis here, but beside that I think it is OK. Should be marked as bugfix. Acked-by: Pawel Wodkowski
[dpdk-dev] [PATCH 4/5] bond mode 4: allow external state machine
Hi Eric Please see my comments. On 2015-04-06 19:01, Eric Kinzie wrote: >Provide functions to allow an external 802.3ad state machine to transmit >and recieve LACPDUs and to set the collection/distribution flags on >slave interfaces. > > Signed-off-by: Eric Kinzie > --- > lib/librte_pmd_bond/rte_eth_bond_8023ad.c | 175 > + > lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 44 ++ > lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h |2 + > 3 files changed, 221 insertions(+) > > diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > index 1009d5b..29cd962 100644 > --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.c > @@ -42,6 +42,8 @@ > > #include "rte_eth_bond_private.h" > > +static void bond_mode_8023ad_ext_periodic_cb(void *arg); > + > #ifdef RTE_LIBRTE_BOND_DEBUG_8023AD > #define MODE4_DEBUG(fmt, ...) RTE_LOG(DEBUG, PMD, "%6u [Port %u: %s] " fmt, > \ > bond_dbg_get_time_diff_ms(), slave_id, \ > @@ -1014,6 +1016,8 @@ bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, > conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; > conf->update_timeout_ms = mode4->update_timeout_us / 1000; > conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks; > + conf->slowrx_cb = mode4->slowrx_cb; > + conf->external_sm = mode4->external_sm; > } > > void > @@ -1035,6 +1039,8 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev, > conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS; > conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS; > conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS; > + conf->slowrx_cb = NULL; > + conf->external_sm = 0; > } > > mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks; > @@ -1045,6 +1051,8 @@ bond_mode_8023ad_setup(struct rte_eth_dev *dev, > mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks; > mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks; > mode4->update_timeout_us = conf->update_timeout_ms * 1000; > + mode4->slowrx_cb = conf->slowrx_cb; > + mode4->external_sm = conf->external_sm; > } > > int > @@ -1062,6 +1070,13 @@ bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev) > int > bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) > { > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_private *mode4 = &internals->mode4; > + > + if (mode4->external_sm) > + return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * > 1000, > + &bond_mode_8023ad_ext_periodic_cb, bond_dev); > + > return rte_eal_alarm_set(BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000, > &bond_mode_8023ad_periodic_cb, bond_dev); > } > @@ -1069,6 +1084,13 @@ bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) > void > bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) > { > + struct bond_dev_private *internals = bond_dev->data->dev_private; > + struct mode8023ad_private *mode4 = &internals->mode4; > + > + if (mode4->external_sm) { > + rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb, > bond_dev); > + return; > + } > rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev); > } > > @@ -1215,3 +1237,156 @@ rte_eth_bond_8023ad_slave_info(uint8_t port_id, > uint8_t slave_id, > info->agg_port_id = port->aggregator_port_id; > return 0; > } > + > +int > +rte_eth_bond_8023ad_ext_collect(uint8_t port_id, uint8_t slave_id, int > enabled) > +{ > + struct rte_eth_dev *bond_dev; > + struct bond_dev_private *internals; > + struct mode8023ad_private *mode4; > + struct port *port; > + > + if (valid_bonded_port_id(port_id) != 0 || > + rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD) The rte_eth_bond_mode_get() function already check if given port_id is valid bonded device so you can remove valid_bonded_port_id() here. You should check here is port is started. > + return -EINVAL; > + > + bond_dev = &rte_eth_devices[port_id]; > + > + internals = bond_dev->data->dev_private; > + if (find_slave_by_id(internals->active_slaves, > + internals->active_slave_count, slave_id) == > + internals->active_slave_count) > + return -EINVAL; > + > + mode4 = &internals->mode4; > + if (mode4->slowrx_cb == NULL || !mode4->external_sm) > + return -EINVAL; > + > + port = &mode_8023ad_ports[slave_id]; > + > + if (enabled) > + ACTOR_STATE_SET(port, COLLECTING); > + else > + ACTOR_STATE_CLR(port, COLLECTING); > + > + return 0; > +} > + > +int > +rte_eth_bond_8023ad_ext_distrib(uint8_t port_id, uint8
[dpdk-dev] [PATCH 5/5] bond mode 4: tests for external state machine
On 2015-04-06 19:01, Eric Kinzie wrote: > > +static void > +lacp_recv_cb(uint8_t slave_id, struct rte_mbuf *lacp_pkt) > +{ > + lacpdu_rx_count[slave_id]++; > + RTE_VERIFY(lacp_pkt != NULL); > + rte_pktmbuf_free(lacp_pkt); > +} > + Would be nice to check here if it is valid LACP packet. -- Pawel
[dpdk-dev] [PATCH] Clean up rte_memcpy.h file
On 2015-04-14 23:31, Ravi Kerur wrote: > + > + for (i = 0; i < 8; i++) { > + ymm = _mm256_loadu_si256((const __m256i *)(src + i * > 32)); > + _mm256_storeu_si256((__m256i *)(dst + i * 32), ymm); > + } > + > n -= 256; > - ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 1 * 32)); > - ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 2 * 32)); > - ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 3 * 32)); > - ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 4 * 32)); > - ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 5 * 32)); > - ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 6 * 32)); > - ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t > *)src + 7 * 32)); > - src = (const uint8_t *)src + 256; > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); > - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); > - dst = (uint8_t *)dst + 256; > + src = src + 256; > + dst = dst + 256; > } Did you perform a performance test on that part? -- Pawel
[dpdk-dev] cost of reading tsc register
On 2015-04-20 16:37, Ravi Kumar Iyer wrote: > Hi, > We were doing some code optimizations , running DPDK based applications, and > chanced upon the rte_rdtsc function [ to read tsc timestamp register value ] > consuming cpu cycles of the order of 100clock cycles with a delta of upto > 40cycles at times [ 60-140 cycles] > > We are actually building up a cpu intensive application which is also very > clock cycle sensitive and this is impacting our implementation. > > To validate the same using a small/vanilla application we wrote a small code > and tested on a single core. > Has anyone else faced a similar issue or are we doing something really > atrocious here. > > Below is the pseudo snip of the same: > > ... > for (i = 0; i < 8 ; i++) > { > g_tsc_cost[i] = rte_rdtsc(); >} ... > > uint64_t sc = rte_rdtsc(); /* start count */ > test_tsc_cost(); > uint64_t ec = rte_rdtsc(); /* end count */ > I am no an expert in this topic but I can share you knowledge I got during lib jobstats implementation (I think you can find it useful in your case with small modification in getting the time). The rte_rdtsc() (it is wrapper to asm rdtsc instruction) is pretty useless in this particular use case. This instruction is pipelined and because of this you wont get precise time. The same is true for rte_rdtsc_precise(). This one is memory barrier followed by rte_rdtsc(). I was surprised that compiler in most cases remove the memory barrier on '-Os' and '-O3', so final code might not be different than rte_rdtsc(). There is no perfect solution for your problem. Assuming you want measure pure code execution time you need to use the ... CPUID instruction :D together with RDTSC and RTDSCP. Yes, this not a joke. The CPUID is some kind of barrier to the out-of-order execution. Writing this in pseudo code: static inline uint64_t rte_rdtscp(void) { union { uint64_t tsc_64; struct { uint32_t lo_32; uint32_t hi_32; }; } tsc; #ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT /* What ever is needed here */ #endif asm volatile("rdtscp" : "=a" (tsc.lo_32), "=d" (tsc.hi_32)); return tsc.tsc_64; } uint_64_t timestamp_start(void) { /* Execution barier */ asm CPUID; return rte_rdtsc(); /* without 'p' */ } uint_64_t timestamp_get(void) { /* Execution barier */ uint64_t time = rte_rdtscp(); /* without 'p' */ asm CPUID return time; } void do_some_task(void) { g_tsc_cost[i] = timestamp_get(); } /* warmup cache */ timestamp_start(); timestamp_start(); timestamp_start(); start_time = timestamp_start(); do_some_task(); end_time = timestamp_get() ... And some words about performance here: If you want use it many times in code and measure intervals less than few thousands of cycles you will kill your application becouse of processor stall at CPUID and RDTSCP instruction so use it wisely. During l2_fwd_jobstats example implementation I tested those cases. With original rte_rdtscp() app was able to handle about 64B packets with 2x7.5GiB traffic/per core. When I used CPUID and RDTSCP to get "accurate" timestamps I got max 2x4.5GiB. So again: use it wisely. And one word that is totally my opinion I came up: those CPUs are no designed to do very precise time measurements, because there is no easy way to implement it without getting significant performance penalty. > > Just to compare, On few bare metal implementations of non-intel processors, > we are seeing the similar code print values with a delta of 3-4 cycles and > thus its becoming a bit difficult to digest as well. Grateful for any > help/guidance here. > I think you should also isolate the CPU from scheduling and use IRQ affinity to remove any unwanted interference form system. -- Pawel
[dpdk-dev] [PATCH] Implement memcmp using AVX/SSE instructio
On 2015-04-22 17:33, Ravi Kerur wrote: > +/** > + * Compare bytes between two locations. The locations must not overlap. > + * > + * @note This is implemented as a macro, so it's address should not be taken > + * and care is needed as parameter expressions may be evaluated multiple > times. > + * > + * @param src_1 > + * Pointer to the first source of the data. > + * @param src_2 > + * Pointer to the second source of the data. > + * @param n > + * Number of bytes to compare. > + * @return > + * true if equal otherwise false. > + */ > +static inline bool > +rte_memcmp(const void *src_1, const void *src, > + size_t n) __attribute__((always_inline)); You are exposing this as public API, so I think you should follow description bellow or not call this _memcmp_ int memcmp(const void *s1, const void *s2, size_t n); The memcmp() function returns an integer less than, equal to, or greater than zero if the first n bytes of s1 is found, respectively, to be less than, to match, or be greater than the first n bytes of s2. -- Pawel
[dpdk-dev] [RFC][PATCH] vfio: allow to map other memory regions
Currently it is not possible to use memory that is not owned by DPDK to perform DMA. This scenarion might be used in vhost applications (like SPDK) where guest send its own memory table. To fill this gap provide API to allow registering arbitrary address in VFIO container. Signed-off-by: Pawel Wodkowski --- lib/librte_eal/linuxapp/eal/Makefile| 3 + lib/librte_eal/linuxapp/eal/eal_vfio.c | 127 lib/librte_eal/linuxapp/eal/eal_vfio.h | 10 ++ lib/librte_eal/linuxapp/eal/include/rte_iommu.h | 76 ++ lib/librte_eal/linuxapp/eal/rte_eal_version.map | 7 ++ 5 files changed, 206 insertions(+), 17 deletions(-) create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd0887de..f0d8ae6ab4a3 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \ + include/rte_iommu.h + INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 53ac725d22e0..549c9824fdd7 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -50,17 +51,19 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); +static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ - { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map, &vfio_type1_dma_mem_map}, /* ppc64 IOMMU, otherwise known as spapr */ - { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map, NULL}, /* IOMMU-less mode */ - { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, + { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map, &vfio_noiommu_dma_mem_map}, }; int @@ -378,6 +381,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } + + vfio_cfg.vfio_iommu_type = t; } } @@ -690,33 +695,61 @@ vfio_get_group_no(const char *sysfs_base, } static int -vfio_type1_dma_map(int vfio_container_fd) +vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; - - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; - - if (ms[i].addr == NULL) - break; + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + if (do_map != 0) { memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " "error %i (%s)\n", errno, strerror(errno)); return -1; } + + } else { + memset(&dma_unmap, 0, sizeof(dma_unmap)); + dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); + dma_unmap.size = len; + dma_unmap.iova = iova; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap); + i
[dpdk-dev] [PATCH v2] vfio: allow to map other memory regions
Currently it is not possible to use memory that is not owned by DPDK to perform DMA. This scenarion might be used in vhost applications (like SPDK) where guest send its own memory table. To fill this gap provide API to allow registering arbitrary address in VFIO container. Change-Id: Ic1f56e850cfdaa48eec02a8ee400e4a66f32892a Signed-off-by: Pawel Wodkowski --- Changes in v2: - Fix syntax error in version map file - Add note for multiprocess - Change iommu_types to use C99 designated initializers --- lib/librte_eal/linuxapp/eal/Makefile| 3 + lib/librte_eal/linuxapp/eal/eal_vfio.c | 142 +--- lib/librte_eal/linuxapp/eal/eal_vfio.h | 10 ++ lib/librte_eal/linuxapp/eal/include/rte_iommu.h | 78 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 8 ++ 5 files changed, 224 insertions(+), 17 deletions(-) create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd0887de..f0d8ae6ab4a3 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \ + include/rte_iommu.h + INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 53ac725d22e0..4e6cc4265a97 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -50,17 +51,34 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); +static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ - { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + { + .type_id = RTE_VFIO_TYPE1, + .name = "Type 1", + .dma_map_func = &vfio_type1_dma_map, + .dma_user_map_func = &vfio_type1_dma_mem_map + }, /* ppc64 IOMMU, otherwise known as spapr */ - { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, + { + .type_id = RTE_VFIO_SPAPR, + .name = "sPAPR", + .dma_map_func = &vfio_spapr_dma_map, + .dma_user_map_func = NULL + }, /* IOMMU-less mode */ - { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, + { + .type_id = RTE_VFIO_NOIOMMU, + .name = "No-IOMMU", + .dma_map_func = &vfio_noiommu_dma_map, + .dma_user_map_func = &vfio_noiommu_dma_mem_map + }, }; int @@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } + + vfio_cfg.vfio_iommu_type = t; } } @@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base, } static int -vfio_type1_dma_map(int vfio_container_fd) +vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; - - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; - - if (ms[i].addr == NULL) - break; + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + if (do_map != 0) { memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping
[dpdk-dev] [PATCH v3] vfio: allow to map other memory regions
Currently it is not possible to use memory that is not owned by DPDK to perform DMA. This scenarion might be used in vhost applications (like SPDK) where guest send its own memory table. To fill this gap provide API to allow registering arbitrary address in VFIO container. Signed-off-by: Pawel Wodkowski --- Changes in v3: - Removed Gerrit Change-Id Changes in v2: - Fix syntax error in version map file - Add note for multiprocess - Change iommu_types to use C99 designated initializers --- lib/librte_eal/linuxapp/eal/Makefile| 3 + lib/librte_eal/linuxapp/eal/eal_vfio.c | 142 +--- lib/librte_eal/linuxapp/eal/eal_vfio.h | 10 ++ lib/librte_eal/linuxapp/eal/include/rte_iommu.h | 78 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 8 ++ 5 files changed, 224 insertions(+), 17 deletions(-) create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd0887de..f0d8ae6ab4a3 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \ + include/rte_iommu.h + INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 53ac725d22e0..4e6cc4265a97 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -50,17 +51,34 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); +static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ - { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + { + .type_id = RTE_VFIO_TYPE1, + .name = "Type 1", + .dma_map_func = &vfio_type1_dma_map, + .dma_user_map_func = &vfio_type1_dma_mem_map + }, /* ppc64 IOMMU, otherwise known as spapr */ - { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, + { + .type_id = RTE_VFIO_SPAPR, + .name = "sPAPR", + .dma_map_func = &vfio_spapr_dma_map, + .dma_user_map_func = NULL + }, /* IOMMU-less mode */ - { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, + { + .type_id = RTE_VFIO_NOIOMMU, + .name = "No-IOMMU", + .dma_map_func = &vfio_noiommu_dma_map, + .dma_user_map_func = &vfio_noiommu_dma_mem_map + }, }; int @@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } + + vfio_cfg.vfio_iommu_type = t; } } @@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base, } static int -vfio_type1_dma_map(int vfio_container_fd) +vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; - - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; - - if (ms[i].addr == NULL) - break; + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + if (do_map != 0) { memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - if (ret) { RTE_LOG(ERR, EAL, " cannot set up DMA remapping
[dpdk-dev] [PATCH] vfio: allow to map other memory regions
Currently it is not possible to use memory that is not owned by DPDK to perform DMA. This scenarion might be used in vhost applications (like SPDK) where guest send its own memory table. To fill this gap provide API to allow registering arbitrary address in VFIO container. Signed-off-by: Pawel Wodkowski --- Changes in v4: - Fix syntax another error in version map file Changes in v3: - Removed Gerrit Change-Id Changes in v2: - Fix syntax error in version map file - Add note for multiprocess - Change iommu_types to use C99 designated initializers --- lib/librte_eal/linuxapp/eal/Makefile| 3 + lib/librte_eal/linuxapp/eal/eal_vfio.c | 142 +--- lib/librte_eal/linuxapp/eal/eal_vfio.h | 10 ++ lib/librte_eal/linuxapp/eal/include/rte_iommu.h | 78 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 8 ++ 5 files changed, 224 insertions(+), 17 deletions(-) create mode 100644 lib/librte_eal/linuxapp/eal/include/rte_iommu.h diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile index 640afd0887de..f0d8ae6ab4a3 100644 --- a/lib/librte_eal/linuxapp/eal/Makefile +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) CFLAGS_eal_thread.o += -Wno-return-type endif +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \ + include/rte_iommu.h + INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 53ac725d22e0..4e6cc4265a97 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "eal_filesystem.h" #include "eal_vfio.h" @@ -50,17 +51,34 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); +static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ - { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + { + .type_id = RTE_VFIO_TYPE1, + .name = "Type 1", + .dma_map_func = &vfio_type1_dma_map, + .dma_user_map_func = &vfio_type1_dma_mem_map + }, /* ppc64 IOMMU, otherwise known as spapr */ - { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, + { + .type_id = RTE_VFIO_SPAPR, + .name = "sPAPR", + .dma_map_func = &vfio_spapr_dma_map, + .dma_user_map_func = NULL + }, /* IOMMU-less mode */ - { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, + { + .type_id = RTE_VFIO_NOIOMMU, + .name = "No-IOMMU", + .dma_map_func = &vfio_noiommu_dma_map, + .dma_user_map_func = &vfio_noiommu_dma_mem_map + }, }; int @@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr, clear_group(vfio_group_fd); return -1; } + + vfio_cfg.vfio_iommu_type = t; } } @@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base, } static int -vfio_type1_dma_map(int vfio_container_fd) +vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, + uint64_t len, int do_map) { - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - int i, ret; - - /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ - for (i = 0; i < RTE_MAX_MEMSEG; i++) { - struct vfio_iommu_type1_dma_map dma_map; - - if (ms[i].addr == NULL) - break; + struct vfio_iommu_type1_dma_map dma_map; + struct vfio_iommu_type1_dma_unmap dma_unmap; + int ret; + if (do_map != 0) { memset(&dma_map, 0, sizeof(dma_map)); dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); - dma_map.vaddr = ms[i].addr_64; - dma_map.size = ms[i].len; - dma_map.iova = ms[i].phys_addr; + dma_map.vaddr = vaddr; + dma_map.size = len; + dma_map.iova = iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); - if (ret) {
[dpdk-dev] [PATCH] eal: fix '--huge-unlink' option
The final_va field is set during remap_segment() but this information is not propagated to temporal copy of huge page memory configuration so the unlink_hugepage_files() function wrongly assume that there is nothing to unlink. Fix this issue by checking orig_va instead of final_va. Fixes: 66cc45e293ed ("mem: replace memseg with memseg lists") To: Anatoly Burakov Signed-off-by: Pawel Wodkowski --- lib/librte_eal/linuxapp/eal/eal_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index e3ac2481572d..b4a2b2b9e405 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -585,7 +585,7 @@ unlink_hugepage_files(struct hugepage_file *hugepg_tbl, for (page = 0; page < nrpages; page++) { struct hugepage_file *hp = &hugepg_tbl[page]; - if (hp->final_va != NULL && unlink(hp->filepath)) { + if (hp->orig_va != NULL && unlink(hp->filepath)) { RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", __func__, hp->filepath, strerror(errno)); } -- 2.7.4
[dpdk-dev] [PATCH v2 0/4] Enable DCB in SRIOV mode for ixgbe driver
v2: - Split patch for easer review. - Remove "pmd: add api version negotiation for ixgbe driver" and "pmd: extend mailbox api to report number of RX/TX queues" patches as those are already already marged from other patch v1: This patchset enables DCB in SRIOV (ETH_MQ_RX_VMDQ_DCB and ETH_MQ_TX_VMDQ_DCB) for each VF and PF for ixgbe driver. As a side effect this allow to use multiple queues for TX in VF (8 if there is 16 or less VFs or 4 if there is 32 or less VFs) when PFC is not enabled. Pawel Wodkowski (4): ethdev: Allow zero rx/tx queues in SRIOV mode ethdev: prevent changing of nb_q_per_pool in rte_eth_dev_check_mq_mode() pmd: add support for DCB in SRIOV mode for ixgbe driver. testpmd: fix dcb in vt mode app/test-pmd/cmdline.c|4 +-- app/test-pmd/testpmd.c| 39 +-- app/test-pmd/testpmd.h| 10 -- lib/librte_ether/rte_ethdev.c | 63 +++-- lib/librte_ether/rte_ethdev.h |2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 42 ++--- lib/librte_pmd_ixgbe/ixgbe_rxtx.c |7 ++--- 7 files changed, 106 insertions(+), 61 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v2 1/4] ethdev: Allow zero rx/tx queues in SRIOV mode
Allow zero rx/tx queues to be passed to rte_eth_dev_configure(). This way PF might be used only for configuration purpose when no receive and/or transmit functionality is needed. Rationale: in SRIOV mode PF use first free VF to RX/TX (at least ixgbe based NICs). For example: if using 82599EB based NIC and VF count is 16, 32 or 64 all recources are assigned to VFs so PF might be used only for configuration purpose. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 077d430..62d7f6e 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -333,7 +333,7 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", sizeof(dev->data->rx_queues[0]) * nb_queues, RTE_CACHE_LINE_SIZE); - if (dev->data->rx_queues == NULL) { + if (dev->data->rx_queues == NULL && nb_queues > 0) { dev->data->nb_rx_queues = 0; return -(ENOMEM); } @@ -475,7 +475,7 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", sizeof(dev->data->tx_queues[0]) * nb_queues, RTE_CACHE_LINE_SIZE); - if (dev->data->tx_queues == NULL) { + if (dev->data->tx_queues == NULL && nb_queues > 0) { dev->data->nb_tx_queues = 0; return -(ENOMEM); } @@ -731,7 +731,10 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } if (nb_rx_q == 0) { PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_q == 0\n", port_id); - return (-EINVAL); + /* In SRIOV there can be no free resource for PF. So permit use only +* for configuration. */ + if (RTE_ETH_DEV_SRIOV(dev).active == 0) + return (-EINVAL); } if (nb_tx_q > dev_info.max_tx_queues) { @@ -739,9 +742,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, port_id, nb_tx_q, dev_info.max_tx_queues); return (-EINVAL); } + if (nb_tx_q == 0) { PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_q == 0\n", port_id); - return (-EINVAL); + /* In SRIOV there can be no free resource for PF. So permit use only +* for configuration. */ + if (RTE_ETH_DEV_SRIOV(dev).active == 0) + return (-EINVAL); } /* Copy the dev_conf parameter into the dev structure */ -- 1.7.9.5
[dpdk-dev] [PATCH v2 2/4] ethdev: prevent changing of nb_q_per_pool in rte_eth_dev_check_mq_mode()
If SRIOV is used and device configuration does not use MQ the RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool is set to 1 in rte_eth_dev_check_mq_mode(). This is bad becouse of two reasons: 1. Port reconfiguration from non-MQ mode to MQ mode is impossible 2. Confguring RX and TX side in different way is impossible. This patch fix first issue by not changing RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool and second by comparing nb_q_per_pool separately for RX (nb_rx_q_per_pool) and for TX (nb_tx_q_per_pool). Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 62d7f6e..85385f8 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -548,6 +548,9 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return (-EINVAL); } + uint16_t nb_rx_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; + uint16_t nb_tx_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; + switch (dev_conf->rxmode.mq_mode) { case ETH_MQ_RX_VMDQ_DCB: case ETH_MQ_RX_VMDQ_DCB_RSS: @@ -580,8 +583,8 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */ /* if nothing mq mode configure, use default scheme */ dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY; - if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1) - RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1; + if (nb_rx_q_per_pool > 1) + nb_rx_q_per_pool = 1; break; } @@ -596,15 +599,16 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */ /* if nothing mq mode configure, use default scheme */ dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY; + if (nb_tx_q_per_pool > 1) + nb_tx_q_per_pool = 1; break; } /* check valid queue number */ - if ((nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) || - (nb_tx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)) { + if (nb_rx_q > nb_rx_q_per_pool || nb_tx_q > nb_tx_q_per_pool) { PMD_DEBUG_TRACE("ethdev port_id=%d SRIOV active, " - "queue number must less equal to %d\n", - port_id, RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool); + "rx/tx queue number must less or equal to %d/%d\n", + port_id, nb_rx_q_per_pool, nb_tx_q_per_pool); return (-EINVAL); } } else { -- 1.7.9.5
[dpdk-dev] [PATCH v2 3/4] pmd: add support for DCB in SRIOV mode for ixgbe driver.
Add support for DCB in SRIOV mode. When no PFC is enabled this feature might be used as multiple queues for VF (up to 8 queues if VFs num is less or equal 16 or 4 if FVs num is less or equal 32). The PF must initializes RX in ETH_MQ_RX_VMDQ_DCB and TX in ETH_MQ_TX_VMDQ_DCB. VF should initialize Rx in ETH_MQ_RX_DCB and Tx in ETH_MQ_TX_DCB to use multiple queues and/or DCB. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 32 lib/librte_ether/rte_ethdev.h |2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 42 +++-- lib/librte_pmd_ixgbe/ixgbe_rxtx.c |7 +++ 4 files changed, 54 insertions(+), 29 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 85385f8..115465e 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -532,6 +532,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, const struct rte_eth_conf *dev_conf) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_dev_info dev_info; if (RTE_ETH_DEV_SRIOV(dev).active != 0) { /* check multi-queue mode */ @@ -553,8 +554,9 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, switch (dev_conf->rxmode.mq_mode) { case ETH_MQ_RX_VMDQ_DCB: + break; case ETH_MQ_RX_VMDQ_DCB_RSS: - /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ + /* DCB+RSS VMDQ in SRIOV mode, not implement yet */ PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 " SRIOV active, " "unsupported VMDQ mq_mode rx %u\n", @@ -589,13 +591,8 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } switch (dev_conf->txmode.mq_mode) { - case ETH_MQ_TX_VMDQ_DCB: - /* DCB VMDQ in SRIOV mode, not implement yet */ - PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 - " SRIOV active, " - "unsupported VMDQ mq_mode tx %u\n", - port_id, dev_conf->txmode.mq_mode); - return (-EINVAL); + case ETH_MQ_TX_VMDQ_DCB: /* DCB VMDQ in SRIOV mode*/ + break; default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */ /* if nothing mq mode configure, use default scheme */ dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY; @@ -612,7 +609,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, return (-EINVAL); } } else { - /* For vmdb+dcb mode check our configuration before we go further */ + /* For vmdq+dcb mode check our configuration before we go further */ if (dev_conf->rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) { const struct rte_eth_vmdq_dcb_conf *conf; @@ -651,11 +648,20 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } } - /* For DCB mode check our configuration before we go further */ + /* For DCB we need to obtain maximum number of queues dinamically, +* as this depends on max VF exported in PF */ + if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) || + (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) { + + FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + (*dev->dev_ops->dev_infos_get)(dev, &dev_info); + } + + /* For DCB mode check out configuration before we go further */ if (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) { const struct rte_eth_dcb_rx_conf *conf; - if (nb_rx_q != ETH_DCB_NUM_QUEUES) { + if (nb_rx_q != dev_info.max_rx_queues) { PMD_DEBUG_TRACE("ethdev port_id=%d DCB, nb_rx_q " "!= %d\n", port_id, ETH_DCB_NUM_QUEUES); @@ -675,7 +681,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, if (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB) { const struct rte_eth_dcb_tx_conf *conf; - if (nb_tx_q != ETH_DCB_NUM_QUEUES) { +
[dpdk-dev] [PATCH v2 4/4] testpmd: fix dcb in vt mode
This patch incorporate fixes to support DCB in SRIOV mode for testpmd. Signed-off-by: Pawel Wodkowski --- app/test-pmd/cmdline.c |4 ++-- app/test-pmd/testpmd.c | 39 +-- app/test-pmd/testpmd.h | 10 -- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 4618b92..d6a18a9 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -1947,9 +1947,9 @@ cmd_config_dcb_parsed(void *parsed_result, /* DCB in VT mode */ if (!strncmp(res->vt_en, "on",2)) - dcb_conf.dcb_mode = DCB_VT_ENABLED; + dcb_conf.vt_en = 1; else - dcb_conf.dcb_mode = DCB_ENABLED; + dcb_conf.vt_en = 0; if (!strncmp(res->pfc_en, "on",2)) { dcb_conf.pfc_en = 1; diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 773b8af..9b12c25 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1743,7 +1743,8 @@ const uint16_t vlan_tags[] = { }; static int -get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) +get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf, + uint16_t sriov) { uint8_t i; @@ -1751,7 +1752,7 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) * Builds up the correct configuration for dcb+vt based on the vlan tags array * given above, and the number of traffic classes available for use. */ - if (dcb_conf->dcb_mode == DCB_VT_ENABLED) { + if (dcb_conf->vt_en == 1) { struct rte_eth_vmdq_dcb_conf vmdq_rx_conf; struct rte_eth_vmdq_dcb_tx_conf vmdq_tx_conf; @@ -1768,9 +1769,17 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) vmdq_rx_conf.pool_map[i].vlan_id = vlan_tags[ i ]; vmdq_rx_conf.pool_map[i].pools = 1 << (i % vmdq_rx_conf.nb_queue_pools); } - for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { - vmdq_rx_conf.dcb_queue[i] = i; - vmdq_tx_conf.dcb_queue[i] = i; + + if (sriov == 0) { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i; + vmdq_tx_conf.dcb_queue[i] = i; + } + } else { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + vmdq_tx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + } } /*set DCB mode of RX and TX of multiple queues*/ @@ -1828,22 +1837,32 @@ init_port_dcb_config(portid_t pid,struct dcb_config *dcb_conf) uint16_t nb_vlan; uint16_t i; - /* rxq and txq configuration in dcb mode */ - nb_rxq = 128; - nb_txq = 128; rx_free_thresh = 64; + rte_port = &ports[pid]; memset(&port_conf,0,sizeof(struct rte_eth_conf)); /* Enter DCB configuration status */ dcb_config = 1; nb_vlan = sizeof( vlan_tags )/sizeof( vlan_tags[ 0 ]); /*set configuration of DCB in vt mode and DCB in non-vt mode*/ - retval = get_eth_dcb_conf(&port_conf, dcb_conf); + retval = get_eth_dcb_conf(&port_conf, dcb_conf, rte_port->dev_info.max_vfs); + + /* rxq and txq configuration in dcb mode */ + nb_rxq = rte_port->dev_info.max_rx_queues; + nb_txq = rte_port->dev_info.max_tx_queues; + + if (rte_port->dev_info.max_vfs) { + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) + nb_rxq /= port_conf.rx_adv_conf.vmdq_dcb_conf.nb_queue_pools; + + if (port_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_DCB) + nb_txq /= port_conf.tx_adv_conf.vmdq_dcb_tx_conf.nb_queue_pools; + } + if (retval < 0) return retval; - rte_port = &ports[pid]; memcpy(&rte_port->dev_conf, &port_conf,sizeof(struct rte_eth_conf)); rte_port->rx_conf.rx_thresh = rx_thresh; diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 8f5e6c7..695e893 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -227,20 +227,10 @@ struct fwd_config { portid_t nb_fwd_ports;/**< Nb. of ports involved. */ }; -/** - * DCB mode enable - */ -enum dcb_mode_enable -{ - DCB_VT_ENABLED, - DCB_ENABLED -}; - /* * DCB general config info */ struct dcb_config { - enum dcb_mode_enable dcb_mode; uint8_t vt_en; enum rte_eth_nb_tcs num_tcs; uint8_t pfc_en; -- 1.7.9.5
[dpdk-dev] [PATCH] Added missing extern 'C' decls in mode4 header files
Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_bond/rte_eth_bond_8023ad.h |8 lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h |8 2 files changed, 16 insertions(+) diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h index 9adc6aa..ebd0e93 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h @@ -36,6 +36,10 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Actor/partner states */ @@ -211,4 +215,8 @@ int rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, struct rte_eth_bond_8023ad_slave_info *conf); +#ifdef __cplusplus +} +#endif + #endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h index 8adee70..7930345 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad_private.h @@ -42,6 +42,10 @@ #include "rte_eth_bond_8023ad.h" +#ifdef __cplusplus +extern "C" { +#endif + #define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 /** Maximum number of packets to one slave queued in TX ring. */ #define BOND_MODE_8023AX_SLAVE_RX_PKTS3 @@ -305,4 +309,8 @@ bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint8_t slave_pos); void bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev); +#ifdef __cplusplus +} +#endif + #endif /* RTE_ETH_BOND_8023AD_H_ */ -- 1.7.9.5
[dpdk-dev] [PATCH 0/2] new headroom stats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs. It can be used to profile every kind of job sets on any arbitrary execution unit. In provided l2fwd-headroom example I demonstrate how to use this library to profile packet forwarding (job set is froward, flush and stats) on LCores (execution unit). This example does no limit possible schemes on which this library can be used. Pawel Wodkowski (2): librte_headroom: New library for checking core/system/app load examples: introduce new l2fwd-headroom example config/common_bsdapp |6 + config/common_linuxapp |6 + examples/Makefile |1 + examples/l2fwd-headroom/Makefile | 51 +++ examples/l2fwd-headroom/main.c | 875 lib/Makefile |1 + lib/librte_headroom/Makefile | 50 +++ lib/librte_headroom/rte_headroom.c | 368 +++ lib/librte_headroom/rte_headroom.h | 481 mk/rte.app.mk |4 + 10 files changed, 1843 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h -- 1.7.9.5
[dpdk-dev] [PATCH 1/2] librte_headroom: New library for checking core/system/app load
To calculate a headroom we need to have some part of code that do something. Those parts of code are called jobs (not tasks, to avoid confusion). Jobs are managed by headroom library, that is responsible for executing them when needed. The rte_headroom_next_job() function is waiting for first job to became ready. If job is ready, time that it spent waiting is added to overal idle time and also is saved. Job is then executed. Executed job must return an integer value. This value is used to calculate next execution time (time when job will be considered ready). For example: if job is forward job it return number of received packets. Returned value is then compared to target value. If returned value is different next_exec_time is adjusted. Previously saved idle time is considered to be a job's idle time (it is added to job's idle time). After execution of last ready job, number of loops is incremented and whole process starts all over again. Please notice that given headroom is no absolute. For example: if some app have avg 100us headroom, adding job that consume 90us will not mean that there is 10us left. You need to run headroom profiling again after adding this 90us-job. Additionaly used can define own handlers: - idle handler - function called when no job is ready to execute. - loop hook - function called when all ready jobs are executed. - job update period callback - if more sophisticated than default function is required to calculate job's execution period. Signed-off-by: Pawel Wodkowski --- config/common_bsdapp |6 + config/common_linuxapp |6 + lib/Makefile |1 + lib/librte_headroom/Makefile | 50 lib/librte_headroom/rte_headroom.c | 368 +++ lib/librte_headroom/rte_headroom.h | 481 mk/rte.app.mk |4 + 7 files changed, 916 insertions(+) create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h diff --git a/config/common_bsdapp b/config/common_bsdapp index 9177db1..eca9299 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -282,6 +282,12 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y +CONFIG_RTE_HEADROOM_MAX_JOBS=32 + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/config/common_linuxapp b/config/common_linuxapp index 2f9643b..54c9458 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -290,6 +290,12 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y +CONFIG_RTE_HEADROOM_MAX_JOBS=32 + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/lib/Makefile b/lib/Makefile index 0ffc982..ab9e474 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -53,6 +53,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_HEADROOM) += librte_headroom DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net diff --git a/lib/librte_headroom/Makefile b/lib/librte_headroom/Makefile new file mode 100644 index 000..f0137e3 --- /dev/null +++ b/lib/librte_headroom/Makefile @@ -0,0 +1,50 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA
[dpdk-dev] [PATCH 2/2] examples: introduce new l2fwd-headroom example
This app demonstrate usage of new headroom library. It is basicaly orginal l2fwd with following modificantions to met headroom library requirements: - main_loop() was split into two jobs: forward job and flush job. Logic for thos jobs is almost the same as in orginal application. - stats is moved to it's own job. - If there is more lcores available than queues/ports, the stats job is run on first free core, otherwise it is run on master core. - stats are expanded to show headroom statistics. Comparing orginal l2fwd and l2fwd-headroom apps will show approach what is needed to properly write own application with headroom measurements. Please notice that assigning separate core for printing stats is prefered becouse flushing stdout is terrible slow and might impact headroom statistics. Signed-off-by: Pawel Wodkowski --- examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 +++ examples/l2fwd-headroom/main.c | 875 ++ 3 files changed, 927 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c diff --git a/examples/Makefile b/examples/Makefile index 81f1d2f..8a459b7 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -50,6 +50,7 @@ DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ip_fragmentation DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ipv4_multicast DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni DIRS-y += l2fwd +DIRS-y += l2fwd-headroom DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem DIRS-y += l3fwd DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl diff --git a/examples/l2fwd-headroom/Makefile b/examples/l2fwd-headroom/Makefile new file mode 100644 index 000..07da286 --- /dev/null +++ b/examples/l2fwd-headroom/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-headroom + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-headroom/main.c b/examples/l2fwd-headroom/main.c new file mode 100644 index 000..4a6c392 --- /dev/null +++ b/examples/l2fwd-headroom/main.c @@ -0,0 +1,875 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THI
[dpdk-dev] [PATCH v2] bond: fix 'extern "C"' decls in mode4 header files
v2: Remove 'extern "C"' declarations from bond private header files. v1: Add missing declarations to rte_bond_8023ad.h. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_bond/rte_eth_bond_8023ad.h | 8 lib/librte_pmd_bond/rte_eth_bond_private.h | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h index 9adc6aa..ebd0e93 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_8023ad.h +++ b/lib/librte_pmd_bond/rte_eth_bond_8023ad.h @@ -36,6 +36,10 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Actor/partner states */ @@ -211,4 +215,8 @@ int rte_eth_bond_8023ad_slave_info(uint8_t port_id, uint8_t slave_id, struct rte_eth_bond_8023ad_slave_info *conf); +#ifdef __cplusplus +} +#endif + #endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h index e01e66b..3da5a9e 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h @@ -34,10 +34,6 @@ #ifndef _RTE_ETH_BOND_PRIVATE_H_ #define _RTE_ETH_BOND_PRIVATE_H_ -#ifdef __cplusplus -extern "C" { -#endif - #include #include @@ -276,8 +272,4 @@ int bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, const char *value, void *extra_args); -#ifdef __cplusplus -} -#endif - #endif -- 1.9.1
[dpdk-dev] Error while executing make - Pktgen-DPDK
On 2015-02-09 20:25, Shankari Vaidyalingam wrote: > Hi, > > I'm trying to compile the DPDK code coming along with the Pktgen-DPDK. > I got the source code of Pktgen-DPDK by cloning the Git repository using - > sudo git clone git://github.com/Pktgen/Pktgen-DPDK.git On Ubuntu 12.04 you need to apply this patch http://dpdk.org/ml/archives/dev/2014-August/004771.html if you are using github repo. I don't think this repo is maintained anymore. You can use this repo http://dpdk.org/browse/apps/pktgen-dpdk/. Pawel > > I'm getting the below error when I tried executing make: > > from > /home/controller/software/Pktgen-DPDK/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h:31, > from > /home/controller/software/Pktgen-DPDK/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h:31, > from > /home/controller/software/Pktgen-DPDK/dpdk/x86_64-native-linuxapp-gcc/build/lib/librte_eal/linuxapp/kni/e1000_82575.c:38: > /home/controller/software/Pktgen-DPDK/dpdk/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h:3864:63: > error: missing binary operator before token "~" > make[10]: *** > [/home/controller/software/Pktgen-DPDK/dpdk/x86_64-native-linuxapp-gcc/build/lib/librte_eal/linuxapp/kni/e1000_82575.o] > Error 1 > make[9]: *** > [_module_/home/controller/software/Pktgen-DPDK/dpdk/x86_64-native-linuxapp-gcc/build/lib/librte_eal/linuxapp/kni] > Error 2 > make[8]: *** [sub-make] Error 2 > make[7]: *** [rte_kni.ko] Error 2 > make[6]: *** [kni] Error 2 > make[5]: *** [linuxapp] Error 2 > make[4]: *** [librte_eal] Error 2 > make[3]: *** [lib] Error 2 > make[2]: *** [all] Error 2 > make[1]: *** [x86_64-native-linuxapp-gcc_install] Error 2 > make: *** [install] Error 2 > > Please let me know how to resolve this issue. > > Regards > Shankari.V > -- Pawel
[dpdk-dev] [PATCH v2 0/2] new headroom stats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs. It can be used to profile every kind of job sets on any arbitrary execution unit or tasking library. In provided l2fwd-headroom example I demonstrate how to use this library to select optimal rx burst poll time. Jobs are selected by using existing rte_timer library calls. This example does no limit possible schemes on which this library can be used. PATCH v2 changes: - Remove jobs management/callback from library to not duplicate tasking library behaviour. - Cleenup/remove useless statistics. - Rework example application to use rte_timer library for jobs selection. - Introduce new app parameter '-l' for automatic thousands separating in stats. - More readable statistics format. Pawel Wodkowski (2): librte_headroom: New library for checking core/system/app load examples: introduce new l2fwd-headroom example config/common_bsdapp |5 + config/common_linuxapp |5 + examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1039 ++ lib/Makefile |1 + lib/librte_headroom/Makefile | 54 ++ lib/librte_headroom/rte_headroom.c | 271 +++ lib/librte_headroom/rte_headroom.h | 324 lib/librte_headroom/rte_headroom_version.map | 20 + mk/rte.app.mk|4 + 11 files changed, 1775 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map -- 1.7.9.5
[dpdk-dev] [PATCH v2 1/2] librte_headroom: New library for checking core/system/app load
This library provide API to measure time spend in particular parts of code and to calculate optimal polling time. To calculate a those statistics application code need to be devided into parts (called jobs) that do something. It is up to application to decide what is considered a job. Series of jobs must be surrounded with the rte_headroom_start_loop() and rte_headroom_finish_loop() calls. After that, jobs might be started. Each job must be surrounded with rte_headroom_start_job() and rte_headroom_finish_job() calls. After job finish its execution, period in which it should be called again is adjusted to minimize time wasted on unnecessary polls/calls. Adjustmend is based on data provided by job itself (ex: number of packets it processed). After all jobs in serie are executed fallowing statistics are updated and might be used by application. Statistics can be reset. Some of provided statistic data: - total/min/max execution - time spent in executing jobs. - total/min/max management - time spent outside execution area. This value might used to measure overhead of sheduling jobs. This time also contains overhead of headroom library itself. - number of loops that executed at least one job - executed jobs - time when statistics were reset. Each job provide total/min/max execution time and execution count statistics. Signed-off-by: Pawel Wodkowski --- config/common_bsdapp |5 + config/common_linuxapp |5 + lib/Makefile |1 + lib/librte_headroom/Makefile | 54 + lib/librte_headroom/rte_headroom.c | 271 + lib/librte_headroom/rte_headroom.h | 324 ++ lib/librte_headroom/rte_headroom_version.map | 20 ++ 7 files changed, 680 insertions(+) create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map diff --git a/config/common_bsdapp b/config/common_bsdapp index 57bacb8..aa2e5fd 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -282,6 +282,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/config/common_linuxapp b/config/common_linuxapp index d428f84..055a37b 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -290,6 +290,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/lib/Makefile b/lib/Makefile index d617d81..4fc2819 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -54,6 +54,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_HEADROOM) += librte_headroom DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net diff --git a/lib/librte_headroom/Makefile b/lib/librte_headroom/Makefile new file mode 100644 index 000..faefb3b --- /dev/null +++ b/lib/librte_headroom/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRA
[dpdk-dev] [PATCH v2 2/2] examples: introduce new l2fwd-headroom example
This app demonstrate usage of new headroom library. It is basicaly orginal l2fwd with following modificantions to met headroom library requirements: - main_loop() was split into two jobs: forward job and flush job. Logic for those jobs is almost the same as in orginal application. - stats is moved to rte_alarm callbac to not introduce overhead of printing. - stats are expanded to show headroom statistics. - added new parameter '-l' to automatic thousands separator. Comparing orginal l2fwd and l2fwd-headroom apps will show approach what is needed to properly write own application with headroom measurements. New available statistics: - Total and % of fwd and flush execution time - management time - overhead of rte_timer + overhead of headroom library - Idle time and % of time spent waiting for fwd or flush to be ready to execute. - per job execution time and period. Signed-off-by: Pawel Wodkowski --- examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1039 ++ mk/rte.app.mk|4 + 4 files changed, 1095 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c diff --git a/examples/Makefile b/examples/Makefile index 81f1d2f..8a459b7 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -50,6 +50,7 @@ DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ip_fragmentation DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ipv4_multicast DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni DIRS-y += l2fwd +DIRS-y += l2fwd-headroom DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem DIRS-y += l3fwd DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl diff --git a/examples/l2fwd-headroom/Makefile b/examples/l2fwd-headroom/Makefile new file mode 100644 index 000..07da286 --- /dev/null +++ b/examples/l2fwd-headroom/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-headroom + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-headroom/main.c b/examples/l2fwd-headroom/main.c new file mode 100644 index 000..7ba1743 --- /dev/null +++ b/examples/l2fwd-headroom/main.c @@ -0,0 +1,1039 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may
[dpdk-dev] [PATCH v3 1/2] pmd: enable DCB in SRIOV
This patch enables DCB in SRIOV mode for ixgbe (Niantic) driver. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 19 ++- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 7 +++ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index 412bab2..7e7434d 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -1514,7 +1514,7 @@ ixgbe_dev_configure(struct rte_eth_dev *dev) if (conf->nb_queue_pools != ETH_16_POOLS && conf->nb_queue_pools != ETH_32_POOLS) { PMD_INIT_LOG(ERR, " VMDQ+DCB selected, " - "number of TX qqueue pools must be %d or %d\n", + "number of TX queue pools must be %d or %d\n", ETH_16_POOLS, ETH_32_POOLS); return (-EINVAL); } diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c index 255c996..8411445 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c @@ -137,7 +137,7 @@ int ixgbe_pf_host_init(struct rte_eth_dev *eth_dev) /* - * Functin that make SRIOV configuration, based on device configuration, + * Function that make SRIOV configuration, based on device configuration, * number of requested queues and number of VF created. * Function returns: * 1 - SRIOV is not enabled (no VF created) @@ -191,7 +191,7 @@ ixgbe_pf_configure_mq_sriov(struct rte_eth_dev *dev) break; case ETH_MQ_RX_RSS: PMD_INIT_LOG(INFO, " RSS (SRIOV active) mode, " - "Rx mq mode is changed from:" + "Rx mq mode is changed from " "mq_mode %u into VMDQ mq_mode %u\n", dev_conf->rxmode.mq_mode, dev->data->dev_conf.rxmode.mq_mode); @@ -295,7 +295,7 @@ ixgbe_pf_configure_mq_sriov(struct rte_eth_dev *dev) /* Check if available queus count is not less than allocated.*/ if (dev->data->nb_rx_queues > sriov->nb_rx_q_per_pool || - dev->data->nb_rx_queues > sriov->nb_tx_q_per_pool) { + dev->data->nb_tx_queues > sriov->nb_tx_q_per_pool) { PMD_INIT_LOG(ERR, "SRIOV active, " "rx/tx queue number must less or equal to %d/%d\n", sriov->nb_rx_q_per_pool, sriov->nb_tx_q_per_pool); @@ -305,7 +305,6 @@ ixgbe_pf_configure_mq_sriov(struct rte_eth_dev *dev) return 0; } - int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev) { uint32_t vtctl, fcrth; @@ -659,7 +658,9 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) { struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool; + struct ixgbe_dcb_config *dcbinfo = + IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private); + uint32_t default_q = RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx; /* Verify if the PF supports the mbox APIs version or not */ switch (vfinfo[vf].api_version) { @@ -677,10 +678,10 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) /* Notify VF of default queue */ msgbuf[IXGBE_VF_DEF_QUEUE] = default_q; - /* -* FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN] -* for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS -*/ + if (dcbinfo->num_tcs.pg_tcs) + msgbuf[IXGBE_VF_TRANS_VLAN] = dcbinfo->num_tcs.pg_tcs; + else + msgbuf[IXGBE_VF_TRANS_VLAN] = 1; return 0; } diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index e6766b3..f845bb0 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -3166,10 +3166,9 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev) /* check support mq_mode for DCB */ if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) && - (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB)) - return; - - if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES) + (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) && + (dev_conf->txmode.mq_mode != ETH_MQ_TX_VMDQ_DCB) && + (dev_conf->txmode.mq_mode != ETH_MQ_TX_DCB)) return; /** Configure DCB hardware **/ -- 1.9.1
[dpdk-dev] [PATCH v3 0/2] new headroom stats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs. It can be used to profile every kind of job sets on any arbitrary execution unit or tasking library. In provided l2fwd-headroom example I demonstrate how to use this library to select optimal rx burst poll time. Jobs are selected by using existing rte_timer library calls. This example does no limit possible schemes on which this library can be used. PATCH v3 changes: - spelling fixes. PATCH v2 changes: - Remove jobs management/callback from library to not duplicate tasking library behaviour. - Cleenup/remove useless statistics. - Rework example application to use rte_timer library for jobs selection. - Introduce new app parameter '-l' for automatic thousands separating in stats. - More readable statistics format. Pawel Wodkowski (2): pmd: enable DCB in SRIOV tespmd: fix DCB in SRIOV mode support app/test-pmd/cmdline.c | 4 ++-- app/test-pmd/testpmd.c | 39 +++-- app/test-pmd/testpmd.h | 10 -- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 19 +- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 7 +++ 6 files changed, 45 insertions(+), 36 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH v3 2/2] tespmd: fix DCB in SRIOV mode support
This patch incorporate fixes to support DCB in SRIOV mode for testpmd. Signed-off-by: Pawel Wodkowski --- app/test-pmd/cmdline.c | 4 ++-- app/test-pmd/testpmd.c | 39 +-- app/test-pmd/testpmd.h | 10 -- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 4beb404..eb9877e 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -1942,9 +1942,9 @@ cmd_config_dcb_parsed(void *parsed_result, /* DCB in VT mode */ if (!strncmp(res->vt_en, "on",2)) - dcb_conf.dcb_mode = DCB_VT_ENABLED; + dcb_conf.vt_en = 1; else - dcb_conf.dcb_mode = DCB_ENABLED; + dcb_conf.vt_en = 0; if (!strncmp(res->pfc_en, "on",2)) { dcb_conf.pfc_en = 1; diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 773b8af..9b12c25 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1743,7 +1743,8 @@ const uint16_t vlan_tags[] = { }; static int -get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) +get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf, + uint16_t sriov) { uint8_t i; @@ -1751,7 +1752,7 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) * Builds up the correct configuration for dcb+vt based on the vlan tags array * given above, and the number of traffic classes available for use. */ - if (dcb_conf->dcb_mode == DCB_VT_ENABLED) { + if (dcb_conf->vt_en == 1) { struct rte_eth_vmdq_dcb_conf vmdq_rx_conf; struct rte_eth_vmdq_dcb_tx_conf vmdq_tx_conf; @@ -1768,9 +1769,17 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) vmdq_rx_conf.pool_map[i].vlan_id = vlan_tags[ i ]; vmdq_rx_conf.pool_map[i].pools = 1 << (i % vmdq_rx_conf.nb_queue_pools); } - for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { - vmdq_rx_conf.dcb_queue[i] = i; - vmdq_tx_conf.dcb_queue[i] = i; + + if (sriov == 0) { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i; + vmdq_tx_conf.dcb_queue[i] = i; + } + } else { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + vmdq_tx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + } } /*set DCB mode of RX and TX of multiple queues*/ @@ -1828,22 +1837,32 @@ init_port_dcb_config(portid_t pid,struct dcb_config *dcb_conf) uint16_t nb_vlan; uint16_t i; - /* rxq and txq configuration in dcb mode */ - nb_rxq = 128; - nb_txq = 128; rx_free_thresh = 64; + rte_port = &ports[pid]; memset(&port_conf,0,sizeof(struct rte_eth_conf)); /* Enter DCB configuration status */ dcb_config = 1; nb_vlan = sizeof( vlan_tags )/sizeof( vlan_tags[ 0 ]); /*set configuration of DCB in vt mode and DCB in non-vt mode*/ - retval = get_eth_dcb_conf(&port_conf, dcb_conf); + retval = get_eth_dcb_conf(&port_conf, dcb_conf, rte_port->dev_info.max_vfs); + + /* rxq and txq configuration in dcb mode */ + nb_rxq = rte_port->dev_info.max_rx_queues; + nb_txq = rte_port->dev_info.max_tx_queues; + + if (rte_port->dev_info.max_vfs) { + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) + nb_rxq /= port_conf.rx_adv_conf.vmdq_dcb_conf.nb_queue_pools; + + if (port_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_DCB) + nb_txq /= port_conf.tx_adv_conf.vmdq_dcb_tx_conf.nb_queue_pools; + } + if (retval < 0) return retval; - rte_port = &ports[pid]; memcpy(&rte_port->dev_conf, &port_conf,sizeof(struct rte_eth_conf)); rte_port->rx_conf.rx_thresh = rx_thresh; diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 8f5e6c7..695e893 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -227,20 +227,10 @@ struct fwd_config { portid_t nb_fwd_ports;/**< Nb. of ports involved. */ }; -/** - * DCB mode enable - */ -enum dcb_mode_enable -{ - DCB_VT_ENABLED, - DCB_ENABLED -}; - /* * DCB general config info */ struct dcb_config { - enum dcb_mode_enable dcb_mode; uint8_t vt_en; enum rte_eth_nb_tcs num_tcs; uint8_t pfc_en; -- 1.9.1
[dpdk-dev] [PATCH v4 0/2] new headroom stats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs. It can be used to profile every kind of job sets on any arbitrary execution unit or tasking library. In provided l2fwd-headroom example I demonstrate how to use this library to select optimal rx burst poll time. Jobs are selected by using existing rte_timer library calls. This example does no limit possible schemes on which this library can be used. PATCH v4 changes: - use proper branch fof generating patch. PATCH v3 changes: - Fix spelling. PATCH v2 changes: - Remove jobs management/callback from library to not duplicate tasking library behaviour. - Cleenup/remove useless statistics. - Rework example application to use rte_timer library for jobs selection. - Introduce new app parameter '-l' for automatic thousands separating in stats. - More readable statistics format. Pawel Wodkowski (2): librte_headroom: New library for checking core/system/app load examples: introduce new l2fwd-headroom example config/common_bsdapp |5 + config/common_linuxapp |5 + examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1039 ++ lib/Makefile |1 + lib/librte_headroom/Makefile | 54 ++ lib/librte_headroom/rte_headroom.c | 271 +++ lib/librte_headroom/rte_headroom.h | 324 lib/librte_headroom/rte_headroom_version.map | 20 + mk/rte.app.mk|4 + 11 files changed, 1775 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map -- 1.9.1
[dpdk-dev] [PATCH v4 1/2] librte_headroom: New library for checking core/system/app load
This library provide API to measure time spend in particular parts of code and to calculate optimal polling time. To calculate a those statistics application code need to be devided into parts (called jobs) that do something. It is up to application to decide what is considered a job. Series of jobs must be surrounded with the rte_headroom_start_loop() and rte_headroom_finish_loop() calls. After that, jobs might be started. Each job must be surrounded with rte_headroom_start_job() and rte_headroom_finish_job() calls. After job finish its execution, period in which it should be called again is adjusted to minimize time wasted on unnecessary polls/calls. Adjustmend is based on data provided by job itself (ex: number of packets it processed). After all jobs in serie are executed fallowing statistics are updated and might be used by application. Statistics can be reset. Some of provided statistic data: - total/min/max execution - time spent in executing jobs. - total/min/max management - time spent outside execution area. This value might used to measure overhead of sheduling jobs. This time also contains overhead of headroom library itself. - number of loops that executed at least one job - executed jobs - time when statistics were reset. Each job provide total/min/max execution time and execution count statistics. Signed-off-by: Pawel Wodkowski --- config/common_bsdapp | 5 + config/common_linuxapp | 5 + lib/Makefile | 1 + lib/librte_headroom/Makefile | 54 + lib/librte_headroom/rte_headroom.c | 271 ++ lib/librte_headroom/rte_headroom.h | 324 +++ lib/librte_headroom/rte_headroom_version.map | 20 ++ 7 files changed, 680 insertions(+) create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map diff --git a/config/common_bsdapp b/config/common_bsdapp index 57bacb8..aa2e5fd 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -282,6 +282,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/config/common_linuxapp b/config/common_linuxapp index d428f84..055a37b 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -290,6 +290,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/lib/Makefile b/lib/Makefile index d617d81..4fc2819 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -54,6 +54,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_HEADROOM) += librte_headroom DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net diff --git a/lib/librte_headroom/Makefile b/lib/librte_headroom/Makefile new file mode 100644 index 000..faefb3b --- /dev/null +++ b/lib/librte_headroom/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRA
[dpdk-dev] [PATCH v4 2/2] examples: introduce new l2fwd-headroom example
This app demonstrate usage of new headroom library. It is basicaly orginal l2fwd with following modificantions to met headroom library requirements: - main_loop() was split into two jobs: forward job and flush job. Logic for those jobs is almost the same as in orginal application. - stats is moved to rte_alarm callbac to not introduce overhead of printing. - stats are expanded to show headroom statistics. - added new parameter '-l' to automatic thousands separator. Comparing orginal l2fwd and l2fwd-headroom apps will show approach what is needed to properly write own application with headroom measurements. New available statistics: - Total and % of fwd and flush execution time - management time - overhead of rte_timer + overhead of headroom library - Idle time and % of time spent waiting for fwd or flush to be ready to execute. - per job execution time and period. Signed-off-by: Pawel Wodkowski --- examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1039 ++ mk/rte.app.mk|4 + 4 files changed, 1095 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c diff --git a/examples/Makefile b/examples/Makefile index 81f1d2f..8a459b7 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -50,6 +50,7 @@ DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ip_fragmentation DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ipv4_multicast DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni DIRS-y += l2fwd +DIRS-y += l2fwd-headroom DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem DIRS-y += l3fwd DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl diff --git a/examples/l2fwd-headroom/Makefile b/examples/l2fwd-headroom/Makefile new file mode 100644 index 000..07da286 --- /dev/null +++ b/examples/l2fwd-headroom/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-headroom + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-headroom/main.c b/examples/l2fwd-headroom/main.c new file mode 100644 index 000..7ba1743 --- /dev/null +++ b/examples/l2fwd-headroom/main.c @@ -0,0 +1,1039 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may
[dpdk-dev] [PATCH v5 0/3] new headroom stats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs. It can be used to profile every kind of job sets on any arbitrary execution unit or tasking library. In provided l2fwd-headroom example I demonstrate how to use this library to select optimal rx burst poll time. Jobs are selected by using existing rte_timer library calls. This example does no limit possible schemes on which this library can be used. PATCH v5 changes: - Fix spelling and checkpatch.pl errors. - Add maintainer claim for library and example app. PATCH v4 changes: - use proper branch for generating patch. PATCH v3 changes: - Fix spelling. PATCH v2 changes: - Remove jobs management/callback from library to not duplicate tasking library behaviour. - Cleenup/remove useless statistics. - Rework example application to use rte_timer library for jobs selection. - Introduce new app parameter '-l' for automatic thousands separating in stats. - More readable statistics format. Pawel Wodkowski (3): librte_headroom: New library for checking core/system/app load examples: introduce new l2fwd-headroom example MAINTAINERS: claim responsibility for headroom library and example app MAINTAINERS |4 + config/common_bsdapp |5 + config/common_linuxapp |5 + examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1040 ++ lib/Makefile |1 + lib/librte_headroom/Makefile | 54 ++ lib/librte_headroom/rte_headroom.c | 271 +++ lib/librte_headroom/rte_headroom.h | 324 lib/librte_headroom/rte_headroom_version.map | 19 + mk/rte.app.mk|4 + 12 files changed, 1779 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map -- 1.9.1
[dpdk-dev] [PATCH v5 1/3] librte_headroom: New library for checking core/system/app load
This library provide API to measure time spend in particular parts of code and to calculate optimal polling time. To calculate a those statistics application code need to be divided into parts (called jobs) that do something. It is up to application to decide what is considered a job. Series of jobs must be surrounded with the rte_headroom_start_loop() and rte_headroom_finish_loop() calls. After that, jobs might be started. Each job must be surrounded with rte_headroom_start_job() and rte_headroom_finish_job() calls. After job finishes its execution, period in which it should be called again is adjusted to minimize time wasted on unnecessary polls/calls. Adjustment is based on data provided by job itself (ex: number of packets it processed). After all jobs in serie are executed fallowing statistics are updated and might be used by application. Statistics can be reset. Some of provided statistic data: - total/min/max execution - time spent in executing jobs. - total/min/max management - time spent outside execution area. This value might be used to measure overhead of scheduling jobs. This time also contains overhead of headroom library itself. - number of loops that executed at least one job - executed jobs - time when statistics were reset. Each job provide total/min/max execution time and execution count statistics. Signed-off-by: Pawel Wodkowski --- config/common_bsdapp | 5 + config/common_linuxapp | 5 + lib/Makefile | 1 + lib/librte_headroom/Makefile | 54 + lib/librte_headroom/rte_headroom.c | 271 ++ lib/librte_headroom/rte_headroom.h | 324 +++ lib/librte_headroom/rte_headroom_version.map | 19 ++ 7 files changed, 679 insertions(+) create mode 100644 lib/librte_headroom/Makefile create mode 100644 lib/librte_headroom/rte_headroom.c create mode 100644 lib/librte_headroom/rte_headroom.h create mode 100644 lib/librte_headroom/rte_headroom_version.map diff --git a/config/common_bsdapp b/config/common_bsdapp index 57bacb8..aa2e5fd 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -282,6 +282,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/config/common_linuxapp b/config/common_linuxapp index d428f84..055a37b 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -290,6 +290,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_headroom +# +CONFIG_RTE_LIBRTE_HEADROOM=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/lib/Makefile b/lib/Makefile index d617d81..4fc2819 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -54,6 +54,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += librte_pmd_vmxnet3 DIRS-$(CONFIG_RTE_LIBRTE_PMD_XENVIRT) += librte_pmd_xenvirt DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_HEADROOM) += librte_headroom DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net diff --git a/lib/librte_headroom/Makefile b/lib/librte_headroom/Makefile new file mode 100644 index 000..faefb3b --- /dev/null +++ b/lib/librte_headroom/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRA
[dpdk-dev] [PATCH v5 2/3] examples: introduce new l2fwd-headroom example
This app demonstrate usage of new headroom library. It is basically the orginal l2fwd with following modifications to met headroom library requirements: - main_loop() was split into two jobs: forward job and flush job. Logic for those jobs is almost the same as in original application. - stats is moved to rte_alarm callback to not introduce overhead of printing. - stats are expanded to show headroom statistics. - added new parameter '-l' to automatic thousands separator. Comparing original l2fwd and l2fwd-headroom apps will show approach what is needed to properly write own application with headroom measurements. New available statistics: - Total and % of fwd and flush execution time - management time - overhead of rte_timer + overhead of headroom library - Idle time and % of time spent waiting for fwd or flush to be ready to execute. - per job execution time and period. Signed-off-by: Pawel Wodkowski --- examples/Makefile|1 + examples/l2fwd-headroom/Makefile | 51 ++ examples/l2fwd-headroom/main.c | 1040 ++ mk/rte.app.mk|4 + 4 files changed, 1096 insertions(+) create mode 100644 examples/l2fwd-headroom/Makefile create mode 100644 examples/l2fwd-headroom/main.c diff --git a/examples/Makefile b/examples/Makefile index 81f1d2f..8a459b7 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -50,6 +50,7 @@ DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ip_fragmentation DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ipv4_multicast DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni DIRS-y += l2fwd +DIRS-y += l2fwd-headroom DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem DIRS-y += l3fwd DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl diff --git a/examples/l2fwd-headroom/Makefile b/examples/l2fwd-headroom/Makefile new file mode 100644 index 000..07da286 --- /dev/null +++ b/examples/l2fwd-headroom/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-headroom + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-headroom/main.c b/examples/l2fwd-headroom/main.c new file mode 100644 index 000..d7e557d --- /dev/null +++ b/examples/l2fwd-headroom/main.c @@ -0,0 +1,1040 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contribu
[dpdk-dev] [PATCH v5 3/3] MAINTAINERS: claim responsibility for headroom library and example app
Signed-off-by: Pawel Wodkowski --- MAINTAINERS | 4 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a771fa3..782b585 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -362,6 +362,10 @@ F: app/test/test_timer* F: examples/timer/ F: doc/guides/sample_app_ug/timer.rst +Headroom +M: Pawel Wodkowski +F: lib/librte_headroom/ +F: examples/l2fwd-headroom/ Test Applications - -- 1.9.1
[dpdk-dev] [PATCH v4 0/7] Enable DCB in SRIOV mode for ixgbe driver
This patchset enables DCB in SRIOV (ETH_MQ_RX_VMDQ_DCB and ETH_MQ_TX_VMDQ_DCB) for each VF and PF for ixgbe driver. As a side effect this allow to use multiple queues for TX in VF (8 if there is 16 or less VFs or 4 if there is 32 or less VFs) when PFC is not enabled. PATCH v4 changes: - resend patch as previous was sent by mistake with different one. PATCH v3 changes: - Rework patch to fit ixgbe RSS in VT mode changes. - move driver specific code from rte_ethdev.c to driver code. - fix bug ixgbe driver VLAN filter enable in PF discoveded during testing. PATCH v2 changes: - Split patch for easer review. - Remove "pmd: add api version negotiation for ixgbe driver" and "pmd: extend mailbox api to report number of RX/TX queues" patches as those are already already marged from other patch Pawel Wodkowski (7): ethdev: Allow zero rx/tx queues in SRIOV mode pmd igb: fix VMDQ mode checking pmd: igb/ixgbe split nb_q_per_pool to rx and tx nb_q_per_pool move rte_eth_dev_check_mq_mode() logic to ixgbe driver pmd ixgbe: enable DCB in SRIOV tespmd: fix DCB in SRIOV mode support pmd ixgbe: fix vlan setting in in PF app/test-pmd/cmdline.c | 4 +- app/test-pmd/testpmd.c | 39 +-- app/test-pmd/testpmd.h | 10 -- lib/librte_ether/rte_ethdev.c | 212 ++ lib/librte_ether/rte_ethdev.h | 3 +- lib/librte_pmd_e1000/igb_ethdev.c | 45 +++- lib/librte_pmd_e1000/igb_pf.c | 3 +- lib/librte_pmd_e1000/igb_rxtx.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 126 ++--- lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 5 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 220 +++- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 18 +-- 12 files changed, 407 insertions(+), 280 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH v4 1/7] ethdev: Allow zero rx/tx queues in SRIOV mode
Allow zero rx/tx queues to be passed to rte_eth_dev_configure(). This way PF might be used only for configuration purpose when no receive and/or transmit functionality is needed. Rationale: in SRIOV mode PF use first free VF to RX/TX (at least ixgbe based NICs). For example: if using 82599EB based NIC and VF count is 16, 32 or 64 all recources are assigned to VFs so PF might be used only for configuration purpose. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index ea3a1fb..2e814db 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -333,7 +333,7 @@ rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", sizeof(dev->data->rx_queues[0]) * nb_queues, RTE_CACHE_LINE_SIZE); - if (dev->data->rx_queues == NULL) { + if (dev->data->rx_queues == NULL && nb_queues > 0) { dev->data->nb_rx_queues = 0; return -(ENOMEM); } @@ -475,7 +475,7 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", sizeof(dev->data->tx_queues[0]) * nb_queues, RTE_CACHE_LINE_SIZE); - if (dev->data->tx_queues == NULL) { + if (dev->data->tx_queues == NULL && nb_queues > 0) { dev->data->nb_tx_queues = 0; return -(ENOMEM); } @@ -731,7 +731,10 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } if (nb_rx_q == 0) { PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_q == 0\n", port_id); - return (-EINVAL); + /* In SRIOV there can be no free resource for PF. So permit use only +* for configuration. */ + if (RTE_ETH_DEV_SRIOV(dev).active == 0) + return (-EINVAL); } if (nb_tx_q > dev_info.max_tx_queues) { @@ -739,9 +742,13 @@ rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, port_id, nb_tx_q, dev_info.max_tx_queues); return (-EINVAL); } + if (nb_tx_q == 0) { PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_q == 0\n", port_id); - return (-EINVAL); + /* In SRIOV there can be no free resource for PF. So permit use only +* for configuration. */ + if (RTE_ETH_DEV_SRIOV(dev).active == 0) + return (-EINVAL); } /* Copy the dev_conf parameter into the dev structure */ -- 1.9.1
[dpdk-dev] [PATCH v4 2/7] pmd igb: fix VMDQ mode checking
RX mode is an enum created by ORing flags. Change compare by value to test a flag when enabling/disabling VLAN filtering during RX queue setup. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_e1000/igb_ethdev.c | 2 +- lib/librte_pmd_e1000/igb_rxtx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c index 2a268b8..d451086 100644 --- a/lib/librte_pmd_e1000/igb_ethdev.c +++ b/lib/librte_pmd_e1000/igb_ethdev.c @@ -816,7 +816,7 @@ eth_igb_start(struct rte_eth_dev *dev) ETH_VLAN_EXTEND_MASK; eth_igb_vlan_offload_set(dev, mask); - if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) { + if ((dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG) != 0) { /* Enable VLAN filter since VMDq always use VLAN filter */ igb_vmdq_vlan_hw_filter_enable(dev); } diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c index 5c394a9..79c458f 100644 --- a/lib/librte_pmd_e1000/igb_rxtx.c +++ b/lib/librte_pmd_e1000/igb_rxtx.c @@ -2150,7 +2150,7 @@ eth_igb_rx_init(struct rte_eth_dev *dev) (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); /* Make sure VLAN Filters are off. */ - if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY) + if ((dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_VMDQ_FLAG) == 0) rctl &= ~E1000_RCTL_VFE; /* Don't store bad packets. */ rctl &= ~E1000_RCTL_SBP; -- 1.9.1
[dpdk-dev] [PATCH v4 3/7] pmd: igb/ixgbe split nb_q_per_pool to rx and tx nb_q_per_pool
rx and tx number of queue might be different if RX and TX are configured in different mode. This allow to inform VF about proper number of queues. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 12 ++-- lib/librte_ether/rte_ethdev.h | 3 ++- lib/librte_pmd_e1000/igb_pf.c | 3 ++- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 9 + 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 2e814db..4007054 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -520,7 +520,7 @@ rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q) return -EINVAL; } - RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q; + RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool = nb_rx_q; RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = dev->pci_dev->max_vfs * nb_rx_q; @@ -567,7 +567,7 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, dev->data->dev_conf.rxmode.mq_mode); case ETH_MQ_RX_VMDQ_RSS: dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS; - if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) + if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool) if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) { PMD_DEBUG_TRACE("ethdev port_id=%d" " SRIOV active, invalid queue" @@ -580,8 +580,8 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */ /* if nothing mq mode configure, use default scheme */ dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY; - if (RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool > 1) - RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = 1; + if (RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool > 1) + RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool = 1; break; } @@ -600,8 +600,8 @@ rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } /* check valid queue number */ - if ((nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) || - (nb_tx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)) { + if ((nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool) || + (nb_tx_q > RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool)) { PMD_DEBUG_TRACE("ethdev port_id=%d SRIOV active, " "queue number must less equal to %d\n", port_id, RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool); diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 84160c3..af86401 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1544,7 +1544,8 @@ struct rte_eth_dev { struct rte_eth_dev_sriov { uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ - uint8_t nb_q_per_pool;/**< rx queue number per pool */ + uint8_t nb_rx_q_per_pool;/**< rx queue number per pool */ + uint8_t nb_tx_q_per_pool;/**< tx queue number per pool */ uint16_t def_vmdq_idx;/**< Default pool num used for PF */ uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ }; diff --git a/lib/librte_pmd_e1000/igb_pf.c b/lib/librte_pmd_e1000/igb_pf.c index bc3816a..9d2f858 100644 --- a/lib/librte_pmd_e1000/igb_pf.c +++ b/lib/librte_pmd_e1000/igb_pf.c @@ -115,7 +115,8 @@ void igb_pf_host_init(struct rte_eth_dev *eth_dev) rte_panic("Cannot allocate memory for private VF data\n"); RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_8_POOLS; - RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = nb_queue; + RTE_ETH_DEV_SRIOV(eth_dev).nb_rx_q_per_pool = nb_queue; + RTE_ETH_DEV_SRIOV(eth_dev).nb_tx_q_per_pool = nb_queue; RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx = vf_num; RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx = (uint16_t)(vf_num * nb_queue); diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index d6d408e..02b9cda 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -3564,7 +3564,7 @@ static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev-&
[dpdk-dev] [PATCH v4 5/7] pmd ixgbe: enable DCB in SRIOV
Enable DCB in SRIOV mode for ixgbe driver. To use DCB in VF PF must configure port as DCB + VMDQ and VF must configure port as DCB only. VF are not allowed to change DCB settings that are common to all ports like number of TC. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 2 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 19 --- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 18 +++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index 8e9da3b..7551bcc 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -1514,7 +1514,7 @@ ixgbe_dev_configure(struct rte_eth_dev *dev) if (conf->nb_queue_pools != ETH_16_POOLS && conf->nb_queue_pools != ETH_32_POOLS) { PMD_INIT_LOG(ERR, " VMDQ+DCB selected, " - "number of TX qqueue pools must be %d or %d\n", + "number of TX queue pools must be %d or %d\n", ETH_16_POOLS, ETH_32_POOLS); return (-EINVAL); } diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c index a7b9333..7c4afba 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c @@ -109,9 +109,12 @@ int ixgbe_pf_host_init(struct rte_eth_dev *eth_dev) /* Fill sriov structure using default configuration. */ retval = ixgbe_pf_configure_mq_sriov(eth_dev); if (retval != 0) { - if (retval < 0) - PMD_INIT_LOG(ERR, " Setting up SRIOV with default device " + if (retval < 0) { + PMD_INIT_LOG(ERR, "Setting up SRIOV with default device " "configuration should not fail. This is a BUG."); + return retval; + } + return 0; } @@ -652,7 +655,9 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) { struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - uint32_t default_q = vf * RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool; + struct ixgbe_dcb_config *dcbinfo = + IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data->dev_private); + uint32_t default_q = RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx; /* Verify if the PF supports the mbox APIs version or not */ switch (vfinfo[vf].api_version) { @@ -670,10 +675,10 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) /* Notify VF of default queue */ msgbuf[IXGBE_VF_DEF_QUEUE] = default_q; - /* -* FIX ME if it needs fill msgbuf[IXGBE_VF_TRANS_VLAN] -* for VLAN strip or VMDQ_DCB or VMDQ_DCB_RSS -*/ + if (dcbinfo->num_tcs.pg_tcs) + msgbuf[IXGBE_VF_TRANS_VLAN] = dcbinfo->num_tcs.pg_tcs; + else + msgbuf[IXGBE_VF_TRANS_VLAN] = 1; return 0; } diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index e6766b3..2e3522c 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -3166,10 +3166,9 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev) /* check support mq_mode for DCB */ if ((dev_conf->rxmode.mq_mode != ETH_MQ_RX_VMDQ_DCB) && - (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB)) - return; - - if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES) + (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB) && + (dev_conf->txmode.mq_mode != ETH_MQ_TX_VMDQ_DCB) && + (dev_conf->txmode.mq_mode != ETH_MQ_TX_DCB)) return; /** Configure DCB hardware **/ @@ -3442,8 +3441,13 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev) ixgbe_config_vf_rss(dev); break; - /* FIXME if support DCB/RSS together with VMDq & SRIOV */ + /* +* DCB will be configured during port startup. +*/ case ETH_MQ_RX_VMDQ_DCB: + break; + + /* FIXME if support DCB+RSS together with VMDq & SRIOV */ case ETH_MQ_RX_VMDQ_DCB_RSS: PMD_INIT_LOG(ERR, "Could not support DCB with VMDq & SRIOV"); @@ -3488,8 +3492,8 @@ ixgbe_dev_mq_tx_configure(struct rte_eth_dev *dev) switch (RTE_ETH_DEV_SRIOV(dev).active) { /* -* SRIOV active scheme -* FIXME if support DCB together wi
[dpdk-dev] [PATCH v4 4/7] move rte_eth_dev_check_mq_mode() logic to driver
Function rte_eth_dev_check_mq_mode() is driver specific. It should be done in PF configuration phase. This patch move igb/ixgbe driver specific mq check and SRIOV configuration code to driver part. Also rewriting log messages to be shorter and more descriptive. Signed-off-by: Pawel Wodkowski --- lib/librte_ether/rte_ethdev.c | 197 --- lib/librte_pmd_e1000/igb_ethdev.c | 43 lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 105 ++- lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 5 +- lib/librte_pmd_ixgbe/ixgbe_pf.c | 202 +++- 5 files changed, 327 insertions(+), 225 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 4007054..aa27e39 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -502,195 +502,6 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) return (0); } -static int -rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - switch (nb_rx_q) { - case 1: - case 2: - RTE_ETH_DEV_SRIOV(dev).active = - ETH_64_POOLS; - break; - case 4: - RTE_ETH_DEV_SRIOV(dev).active = - ETH_32_POOLS; - break; - default: - return -EINVAL; - } - - RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool = nb_rx_q; - RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = - dev->pci_dev->max_vfs * nb_rx_q; - - return 0; -} - -static int -rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, - const struct rte_eth_conf *dev_conf) -{ - struct rte_eth_dev *dev = &rte_eth_devices[port_id]; - - if (RTE_ETH_DEV_SRIOV(dev).active != 0) { - /* check multi-queue mode */ - if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) || - (dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) || - (dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) { - /* SRIOV only works in VMDq enable mode */ - PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 - " SRIOV active, " - "wrong VMDQ mq_mode rx %u tx %u\n", - port_id, - dev_conf->rxmode.mq_mode, - dev_conf->txmode.mq_mode); - return (-EINVAL); - } - - switch (dev_conf->rxmode.mq_mode) { - case ETH_MQ_RX_VMDQ_DCB: - case ETH_MQ_RX_VMDQ_DCB_RSS: - /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ - PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 - " SRIOV active, " - "unsupported VMDQ mq_mode rx %u\n", - port_id, dev_conf->rxmode.mq_mode); - return (-EINVAL); - case ETH_MQ_RX_RSS: - PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 - " SRIOV active, " - "Rx mq mode is changed from:" - "mq_mode %u into VMDQ mq_mode %u\n", - port_id, - dev_conf->rxmode.mq_mode, - dev->data->dev_conf.rxmode.mq_mode); - case ETH_MQ_RX_VMDQ_RSS: - dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_RSS; - if (nb_rx_q <= RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool) - if (rte_eth_dev_check_vf_rss_rxq_num(port_id, nb_rx_q) != 0) { - PMD_DEBUG_TRACE("ethdev port_id=%d" - " SRIOV active, invalid queue" - " number for VMDQ RSS, allowed" - " value are 1, 2 or 4\n", - port_id); - return -EINVAL; - } - break; - default: /* ETH_MQ_RX_VMDQ_ONLY or ETH_MQ_RX_NONE */ - /* if nothing mq mode configure, use default scheme */ - dev->data->dev_conf.rxmode.mq_mode = ETH_MQ_RX_VMDQ_ONLY; - if (RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool > 1
[dpdk-dev] [PATCH v4 6/7] tespmd: fix DCB in SRIOV mode support
This patch incorporate fixes to support DCB in SRIOV mode for testpmd. Signed-off-by: Pawel Wodkowski --- app/test-pmd/cmdline.c | 4 ++-- app/test-pmd/testpmd.c | 39 +-- app/test-pmd/testpmd.h | 10 -- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 4753bb4..1e30ca6 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -1964,9 +1964,9 @@ cmd_config_dcb_parsed(void *parsed_result, /* DCB in VT mode */ if (!strncmp(res->vt_en, "on",2)) - dcb_conf.dcb_mode = DCB_VT_ENABLED; + dcb_conf.vt_en = 1; else - dcb_conf.dcb_mode = DCB_ENABLED; + dcb_conf.vt_en = 0; if (!strncmp(res->pfc_en, "on",2)) { dcb_conf.pfc_en = 1; diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 3aebea6..bdbf237 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1766,7 +1766,8 @@ const uint16_t vlan_tags[] = { }; static int -get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) +get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf, + uint16_t sriov) { uint8_t i; @@ -1774,7 +1775,7 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) * Builds up the correct configuration for dcb+vt based on the vlan tags array * given above, and the number of traffic classes available for use. */ - if (dcb_conf->dcb_mode == DCB_VT_ENABLED) { + if (dcb_conf->vt_en == 1) { struct rte_eth_vmdq_dcb_conf vmdq_rx_conf; struct rte_eth_vmdq_dcb_tx_conf vmdq_tx_conf; @@ -1791,9 +1792,17 @@ get_eth_dcb_conf(struct rte_eth_conf *eth_conf, struct dcb_config *dcb_conf) vmdq_rx_conf.pool_map[i].vlan_id = vlan_tags[ i ]; vmdq_rx_conf.pool_map[i].pools = 1 << (i % vmdq_rx_conf.nb_queue_pools); } - for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { - vmdq_rx_conf.dcb_queue[i] = i; - vmdq_tx_conf.dcb_queue[i] = i; + + if (sriov == 0) { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i; + vmdq_tx_conf.dcb_queue[i] = i; + } + } else { + for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++) { + vmdq_rx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + vmdq_tx_conf.dcb_queue[i] = i % dcb_conf->num_tcs; + } } /*set DCB mode of RX and TX of multiple queues*/ @@ -1851,22 +1860,32 @@ init_port_dcb_config(portid_t pid,struct dcb_config *dcb_conf) uint16_t nb_vlan; uint16_t i; - /* rxq and txq configuration in dcb mode */ - nb_rxq = 128; - nb_txq = 128; rx_free_thresh = 64; + rte_port = &ports[pid]; memset(&port_conf,0,sizeof(struct rte_eth_conf)); /* Enter DCB configuration status */ dcb_config = 1; nb_vlan = sizeof( vlan_tags )/sizeof( vlan_tags[ 0 ]); /*set configuration of DCB in vt mode and DCB in non-vt mode*/ - retval = get_eth_dcb_conf(&port_conf, dcb_conf); + retval = get_eth_dcb_conf(&port_conf, dcb_conf, rte_port->dev_info.max_vfs); + + /* rxq and txq configuration in dcb mode */ + nb_rxq = rte_port->dev_info.max_rx_queues; + nb_txq = rte_port->dev_info.max_tx_queues; + + if (rte_port->dev_info.max_vfs) { + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_DCB) + nb_rxq /= port_conf.rx_adv_conf.vmdq_dcb_conf.nb_queue_pools; + + if (port_conf.txmode.mq_mode == ETH_MQ_TX_VMDQ_DCB) + nb_txq /= port_conf.tx_adv_conf.vmdq_dcb_tx_conf.nb_queue_pools; + } + if (retval < 0) return retval; - rte_port = &ports[pid]; memcpy(&rte_port->dev_conf, &port_conf,sizeof(struct rte_eth_conf)); rxtx_port_config(rte_port); diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 581130b..0ef3257 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -230,20 +230,10 @@ struct fwd_config { portid_t nb_fwd_ports;/**< Nb. of ports involved. */ }; -/** - * DCB mode enable - */ -enum dcb_mode_enable -{ - DCB_VT_ENABLED, - DCB_ENABLED -}; - /* * DCB general config info */ struct dcb_config { - enum dcb_mode_enable dcb_mode; uint8_t vt_en; enum rte_eth_nb_tcs num_tcs; uint8_t pfc_en; -- 1.9.1
[dpdk-dev] [PATCH v4 7/7] pmd ixgbe: fix vlan setting in in PF
The ixgbe_vlan_filter_set() should use hw->mac.ops.set_vfta() to set VLAN filtering as this is generic function that handles both non-SRIOV and SRIOV cases. Bug was discovered issuing command in testpmd 'rx_vlan add VLAN PORT' for PF. Requested VLAN was enabled but pool mask is not set. Only command 'rx_vlan add VLAN port PORT vf MASK' can enable pointed VLAN id for PF. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c index 7551bcc..7aef0e8 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_ethdev.c +++ b/lib/librte_pmd_ixgbe/ixgbe_ethdev.c @@ -1162,21 +1162,18 @@ ixgbe_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vfta * shadow_vfta = IXGBE_DEV_PRIVATE_TO_VFTA(dev->data->dev_private); - uint32_t vfta; + struct rte_eth_dev_sriov *sriov = &RTE_ETH_DEV_SRIOV(dev); + u32 vind = sriov->active ? sriov->def_vmdq_idx : 0; + s32 ret_val; uint32_t vid_idx; - uint32_t vid_bit; - vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F); - vid_bit = (uint32_t) (1 << (vlan_id & 0x1F)); - vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid_idx)); - if (on) - vfta |= vid_bit; - else - vfta &= ~vid_bit; - IXGBE_WRITE_REG(hw, IXGBE_VFTA(vid_idx), vfta); + ret_val = hw->mac.ops.set_vfta(hw, vlan_id, vind, on); + if (ret_val != IXGBE_SUCCESS) + return ret_val; + vid_idx = (uint32_t) ((vlan_id >> 5) & 0x7F); /* update local VFTA copy */ - shadow_vfta->vfta[vid_idx] = vfta; + shadow_vfta->vfta[vid_idx] = IXGBE_READ_REG(hw, IXGBE_VFTA(vid_idx)); return 0; } -- 1.9.1
[dpdk-dev] [PATCH 0/5] Fix issues reported by static analysis tool
Klockwork report some issues against current DPDK version. Most of them need only cosmetic code changes (changing type of variable or adding explicit cast). One issue related with ring pmd fix real memory leak problem. Pawel Wodkowski (5): rte_timer: fix invalid declaration of rte_timer_cb_t librte_kvargs: make rte_kvargs_free() be consistent with other "free()" functions pmd ring: fix possible memory leak during devinit cmdline: make parse_set_list() use size_t instead of int for low/high parameter Fix usage of fgets in various places lib/librte_cfgfile/rte_cfgfile.c| 2 +- lib/librte_cmdline/cmdline_parse_portlist.c | 4 ++-- lib/librte_eal/bsdapp/eal/eal.c | 2 +- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 4 ++-- lib/librte_eal/linuxapp/eal/eal_memory.c| 2 +- lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +- lib/librte_eal/linuxapp/eal/eal_timer.c | 2 +- lib/librte_kvargs/rte_kvargs.c | 4 lib/librte_kvargs/rte_kvargs.h | 3 ++- lib/librte_pmd_ring/rte_eth_ring.c | 6 +++--- lib/librte_pmd_virtio/virtio_ethdev.c | 2 +- lib/librte_power/rte_power_acpi_cpufreq.c | 10 +- lib/librte_timer/rte_timer.h| 4 ++-- 13 files changed, 26 insertions(+), 21 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH 1/5] rte_timer: fix invalid declaration of rte_timer_cb_t
Declaration for function pointer should be typedef ret_type (*type_name)(args...) not typedef ret_type (type_name)(args...) although compiler treat both of them the same, the static analysis tool like klocwork complain about that. Signed-off-by: Pawel Wodkowski --- lib/librte_timer/rte_timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index 4907cf5..327fe4b 100644 --- a/lib/librte_timer/rte_timer.h +++ b/lib/librte_timer/rte_timer.h @@ -115,7 +115,7 @@ struct rte_timer; /** * Callback function type for timer expiry. */ -typedef void (rte_timer_cb_t)(struct rte_timer *, void *); +typedef void (*rte_timer_cb_t)(struct rte_timer *, void *); #define MAX_SKIPLIST_DEPTH 10 @@ -128,7 +128,7 @@ struct rte_timer struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH]; volatile union rte_timer_status status; /**< Status of timer. */ uint64_t period; /**< Period of timer (0 if not periodic). */ - rte_timer_cb_t *f; /**< Callback function. */ + rte_timer_cb_t f; /**< Callback function. */ void *arg; /**< Argument to callback function. */ }; -- 1.9.1
[dpdk-dev] [PATCH 2/5] librte_kvargs: make rte_kvargs_free() be consistent with other "free()" functions
It is desired that all type of *_free() functions mimic behaviour of libc free() function. This function does nothing if given parameter is NULL. This patch add this behaviour for rte_kvargs_free(). Signed-off-by: Pawel Wodkowski --- lib/librte_kvargs/rte_kvargs.c | 4 lib/librte_kvargs/rte_kvargs.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c index 8bc1e46..c2dd051 100644 --- a/lib/librte_kvargs/rte_kvargs.c +++ b/lib/librte_kvargs/rte_kvargs.c @@ -174,8 +174,12 @@ rte_kvargs_process(const struct rte_kvargs *kvlist, void rte_kvargs_free(struct rte_kvargs *kvlist) { + if (!kvlist) + return; + if (kvlist->str != NULL) free(kvlist->str); + free(kvlist); } diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h index ef4efab..ae9ae79 100644 --- a/lib/librte_kvargs/rte_kvargs.h +++ b/lib/librte_kvargs/rte_kvargs.h @@ -115,7 +115,8 @@ void rte_kvargs_free(struct rte_kvargs *kvlist); * * For each key/value association that matches the given key, calls the * handler function with the for a given arg_name passing the value on the - * dictionary for that key and a given extra argument. + * dictionary for that key and a given extra argument. If *kvlist* is NULL + * function does nothing. * * @param kvlist * The rte_kvargs structure -- 1.9.1
[dpdk-dev] [PATCH 3/5] pmd ring: fix possible memory leak during devinit
Free kvlist on function exit to avoid memory leak. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ring/rte_eth_ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_ring/rte_eth_ring.c b/lib/librte_pmd_ring/rte_eth_ring.c index a23e933..582a621 100644 --- a/lib/librte_pmd_ring/rte_eth_ring.c +++ b/lib/librte_pmd_ring/rte_eth_ring.c @@ -527,7 +527,7 @@ out: static int rte_pmd_ring_devinit(const char *name, const char *params) { - struct rte_kvargs *kvlist; + struct rte_kvargs *kvlist = NULL; int ret = 0; struct node_action_list *info = NULL; @@ -548,7 +548,7 @@ rte_pmd_ring_devinit(const char *name, const char *params) info = rte_zmalloc("struct node_action_list", sizeof(struct node_action_list) + (sizeof(struct node_action_pair) * ret), 0); if (!info) - goto out; + goto out_free; info->total = ret; info->list = (struct node_action_pair*)(info + 1); @@ -567,8 +567,8 @@ rte_pmd_ring_devinit(const char *name, const char *params) } out_free: + rte_kvargs_free(kvlist); rte_free(info); -out: return ret; } -- 1.9.1
[dpdk-dev] [PATCH 4/5] cmdline: make parse_set_list() use size_t instead of int for low/high parameter
Fix warning reported by klocwork about size_t to int cast when passing parameters to parse_set_list(). This patch fix code formating errors that give checkpatch.pl errors after generating patch. Signed-off-by: Pawel Wodkowski --- lib/librte_cmdline/cmdline_parse_portlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c index fc6c14e..9c1fe3e 100644 --- a/lib/librte_cmdline/cmdline_parse_portlist.c +++ b/lib/librte_cmdline/cmdline_parse_portlist.c @@ -78,7 +78,7 @@ struct cmdline_token_ops cmdline_token_portlist_ops = { }; static void -parse_set_list(cmdline_portlist_t * pl, int low, int high) +parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high) { do { pl->map |= (1 << low++); @@ -86,7 +86,7 @@ parse_set_list(cmdline_portlist_t * pl, int low, int high) } static int -parse_ports(cmdline_portlist_t * pl, const char * str) +parse_ports(cmdline_portlist_t *pl, const char *str) { size_t ps, pe; const char *first, *last; -- 1.9.1
[dpdk-dev] [PATCH 5/5] Fix usage of fgets in various places
Declaration of fgets() is char *fgets(char *str, int size, FILE *stream); Klocwork complain about passing "sizeof()" as size parameter since implicit casting size_t to int might cause loss of precision. Signed-off-by: Pawel Wodkowski --- lib/librte_cfgfile/rte_cfgfile.c| 2 +- lib/librte_eal/bsdapp/eal/eal.c | 2 +- lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 4 ++-- lib/librte_eal/linuxapp/eal/eal_memory.c| 2 +- lib/librte_eal/linuxapp/eal/eal_pci.c | 2 +- lib/librte_eal/linuxapp/eal/eal_timer.c | 2 +- lib/librte_pmd_virtio/virtio_ethdev.c | 2 +- lib/librte_power/rte_power_acpi_cpufreq.c | 10 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c index b81c273..15ef447 100644 --- a/lib/librte_cfgfile/rte_cfgfile.c +++ b/lib/librte_cfgfile/rte_cfgfile.c @@ -107,7 +107,7 @@ rte_cfgfile_load(const char *filename, int flags) memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); - while (fgets(buffer, sizeof(buffer), f) != NULL) { + while (fgets(buffer, (int)sizeof(buffer), f) != NULL) { char *pos = NULL; size_t len = strnlen(buffer, sizeof(buffer)); lineno++; diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index 69f3c03..ca51868 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -134,7 +134,7 @@ eal_parse_sysfs_value(const char *filename, unsigned long *val) return -1; } - if (fgets(buf, sizeof(buf), f) == NULL) { + if (fgets(buf, (int)sizeof(buf), f) == NULL) { RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", __func__, filename); fclose(f); diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c index 590cb56..551472c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -115,7 +115,7 @@ get_default_hp_size(void) FILE *fd = fopen(proc_meminfo, "r"); if (fd == NULL) rte_panic("Cannot open %s\n", proc_meminfo); - while(fgets(buffer, sizeof(buffer), fd)){ + while (fgets(buffer, (int)sizeof(buffer), fd)) { if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ size = rte_str_to_size(&buffer[hugepagesz_len]); break; @@ -155,7 +155,7 @@ get_hugepage_dir(uint64_t hugepage_sz) if (default_size == 0) default_size = get_default_hp_size(); - while (fgets(buf, sizeof(buf), fd)){ + while (fgets(buf, (int)sizeof(buf), fd)) { if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, split_tok) != _FIELDNAME_MAX) { RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index a67a1b0..0c7f8ce 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -614,7 +614,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); /* parse numa map */ - while (fgets(buf, sizeof(buf), f) != NULL) { + while (fgets(buf, (int)sizeof(buf), f) != NULL) { /* ignore non huge page */ if (strstr(buf, " huge ") == NULL && diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 63bcbce..ee4e1d8 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -162,7 +162,7 @@ pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) for (i = 0; i
[dpdk-dev] [PATCH v5 0/3] new headroom stats library and example application
On 2015-02-23 17:04, Thomas Monjalon wrote: >>> Do you mean that you plan to add some features to this library? >>> > >Is it going to stay at providing some stats or could you make some >>> > >actions >>> > >like time-sharing helpers? >> >What do you mean here saying time-sharing? > I mean helpers to stop processing at a defined rate in order to share CPU. > I am not sure if are talking about the same but this is already present by period field in struct rte_headroom_job (or whatever it will be called in next version). This field is a hint for application and allow execute jobs when needed. If application decide that there is no time to execute some jobs it can skip it but it is up to application decision. The job stats + period hint and ability to skip or invoke jobs at any point and even dynamically decide if this part of code is now considered a job is the added value of this library. Every above thing separately have limited usage. -- Pawel
[dpdk-dev] [PATCH 3/5] pmd ring: fix possible memory leak during devinit
On 2015-02-24 12:05, Olivier MATZ wrote: > On 02/23/2015 03:09 PM, Pawel Wodkowski wrote: >> Free kvlist on function exit to avoid memory leak. >> >> Signed-off-by: Pawel Wodkowski > > Acked-by: Olivier Matz > Please have in mind that this patch depend on patch 2/5 -- Pawel
[dpdk-dev] [PATCH v6 0/3] new rte_jobstats library and example application
Hi community, I would like to introduce library for measuring load of some arbitrary jobs and help finding optimal poll time in poll mode applications. It can be used to measure and drive every kind of job sets on any arbitrary execution unit or tasking library. In provided l2fwd-jobstats example I demonstrate how to use this library to select optimal rx burst poll time and find out idle time. Jobs are selected by using existing rte_timer library calls. This example does no limit possible schemes on which this library can be used. PATCH v6 changes: - rename library name to rte_jobstats. - clean unused includes and dependencies in library. - change/fix API documentation. - reword cover letter. PATCH v5 changes: - Fix spelling and checkpatch.pl errors. - Add maintainer claim for library and example app. PATCH v4 changes: - use proper branch for generating patch. PATCH v3 changes: - Fix spelling. PATCH v2 changes: - Remove jobs management/callback from library to not duplicate tasking library behaviour. - Cleenup/remove useless statistics. - Rework example application to use rte_timer library for jobs selection. - Introduce new app parameter '-l' for automatic thousands separating in stats. - More readable statistics format. Pawel Wodkowski (3): librte_jobstats: New library for checking core/system/app load examples: introduce new l2fwd-jobstats example MAINTAINERS: claim responsibility for rte_jobstats library and example app MAINTAINERS |4 + config/common_bsdapp |5 + config/common_linuxapp |5 + doc/api/doxy-api.conf|1 + examples/Makefile|1 + examples/l2fwd-jobstats/Makefile | 51 ++ examples/l2fwd-jobstats/main.c | 1040 ++ lib/Makefile |1 + lib/librte_jobstats/Makefile | 53 ++ lib/librte_jobstats/rte_jobstats.c | 273 +++ lib/librte_jobstats/rte_jobstats.h | 322 lib/librte_jobstats/rte_jobstats_version.map | 19 + mk/rte.app.mk|4 + 13 files changed, 1779 insertions(+) create mode 100644 examples/l2fwd-jobstats/Makefile create mode 100644 examples/l2fwd-jobstats/main.c create mode 100644 lib/librte_jobstats/Makefile create mode 100644 lib/librte_jobstats/rte_jobstats.c create mode 100644 lib/librte_jobstats/rte_jobstats.h create mode 100644 lib/librte_jobstats/rte_jobstats_version.map -- 1.9.1
[dpdk-dev] [PATCH v6 1/3] librte_jobstats: New library for checking core/system/app load
This library provide API to measure time spend in particular parts of code and to calculate optimal polling time. To calculate a those statistics application code need to be divided into parts (called jobs) that do something. It is up to application to decide what is considered a job. Series of jobs must be surrounded with the rte_jobstats_context_start() and rte_jobstats_context_finish() calls. After that, jobs might be started. Each job must be surrounded with rte_jobstats_start() and rte_jobstats_finish() calls. After job finishes its execution, period in which it should be called again is adjusted. It might be used to minimize time wasted on unnecessary polls/calls. Adjustment is based on data provided by job itself (ex: number of packets it processed). After all jobs in serie are executed fallowing statistics are updated and might be used by application. Statistics can be reset. Some of provided statistic data: - total/min/max execution - time spent in executing jobs. - total/min/max management - time spent outside execution area. This value might be used to measure overhead of scheduling jobs. This time also contains overhead of rte_jobstats library itself. - number of loops that executed at least one job - executed jobs - time when statistics were reset. Each job provide total/min/max execution time and execution count statistics. Signed-off-by: Pawel Wodkowski --- config/common_bsdapp | 5 + config/common_linuxapp | 5 + doc/api/doxy-api.conf| 1 + lib/Makefile | 1 + lib/librte_jobstats/Makefile | 53 + lib/librte_jobstats/rte_jobstats.c | 273 +++ lib/librte_jobstats/rte_jobstats.h | 322 +++ lib/librte_jobstats/rte_jobstats_version.map | 19 ++ 8 files changed, 679 insertions(+) create mode 100644 lib/librte_jobstats/Makefile create mode 100644 lib/librte_jobstats/rte_jobstats.c create mode 100644 lib/librte_jobstats/rte_jobstats.h create mode 100644 lib/librte_jobstats/rte_jobstats_version.map diff --git a/config/common_bsdapp b/config/common_bsdapp index 57bacb8..86dc329 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -282,6 +282,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_jobstats +# +CONFIG_RTE_LIBRTE_JOBSTATS=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/config/common_linuxapp b/config/common_linuxapp index d428f84..6cfadef 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -290,6 +290,11 @@ CONFIG_RTE_LIBRTE_HASH=y CONFIG_RTE_LIBRTE_HASH_DEBUG=n # +# Compile librte_jobstats +# +CONFIG_RTE_LIBRTE_JOBSTATS=y + +# # Compile librte_lpm # CONFIG_RTE_LIBRTE_LPM=y diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf index 27c782c..8a6a5e6 100644 --- a/doc/api/doxy-api.conf +++ b/doc/api/doxy-api.conf @@ -37,6 +37,7 @@ INPUT = doc/api/doxy-api-index.md \ lib/librte_ether \ lib/librte_hash \ lib/librte_ip_frag \ + lib/librte_jobstats \ lib/librte_kni \ lib/librte_kvargs \ lib/librte_lpm \ diff --git a/lib/Makefile b/lib/Makefile index d617d81..42ffe2f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -58,6 +58,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag +DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile new file mode 100644 index 000..136a448 --- /dev/null +++ b/lib/librte_jobstats/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
[dpdk-dev] [PATCH v6 2/3] examples: introduce new l2fwd-jobstats example
This app demonstrate usage of new rte_jobstats library. It is basically the orginal l2fwd with following modifications to met library requirements: - main_loop() was split into two jobs: forward job and flush job. Logic for those jobs is almost the same as in original application. - stats is moved to rte_alarm callback to not introduce overhead of printing. - stats are expanded to show rte_jobstats statistics. - added new parameter '-l' to automatic thousands separator. Comparing original l2fwd and l2fwd-jobstats apps will show approach what is needed to properly write own application with rte_jobstats measurements. New available statistics: - Total and % of fwd and flush execution time - management time - overhead of rte_timer + overhead of rte_jobstats library - Idle time and % of time spent waiting for fwd or flush to be ready to execute. - per job execution time and period. Signed-off-by: Pawel Wodkowski --- examples/Makefile|1 + examples/l2fwd-jobstats/Makefile | 51 ++ examples/l2fwd-jobstats/main.c | 1040 ++ mk/rte.app.mk|4 + 4 files changed, 1096 insertions(+) create mode 100644 examples/l2fwd-jobstats/Makefile create mode 100644 examples/l2fwd-jobstats/main.c diff --git a/examples/Makefile b/examples/Makefile index 81f1d2f..e847ded 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -50,6 +50,7 @@ DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ip_fragmentation DIRS-$(CONFIG_RTE_MBUF_REFCNT) += ipv4_multicast DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni DIRS-y += l2fwd +DIRS-y += l2fwd-jobstats DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem DIRS-y += l3fwd DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl diff --git a/examples/l2fwd-jobstats/Makefile b/examples/l2fwd-jobstats/Makefile new file mode 100644 index 000..d57a0ae --- /dev/null +++ b/examples/l2fwd-jobstats/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, can be overriden by command line or environment +RTE_TARGET ?= x86_64-native-linuxapp-gcc + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l2fwd-jobstats + +# all source are stored in SRCS-y +SRCS-y := main.c + + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c new file mode 100644 index 000..a5a1aaa --- /dev/null +++ b/examples/l2fwd-jobstats/main.c @@ -0,0 +1,1040 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + *
[dpdk-dev] [PATCH v6 3/3] MAINTAINERS: claim responsibility for rte_jobstats library and example app
Signed-off-by: Pawel Wodkowski --- MAINTAINERS | 4 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a771fa3..7b3ef00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -362,6 +362,10 @@ F: app/test/test_timer* F: examples/timer/ F: doc/guides/sample_app_ug/timer.rst +Job stats +M: Pawel Wodkowski +F: lib/librte_jobstats/ +F: examples/l2fwd-jobstats/ Test Applications - -- 1.9.1
[dpdk-dev] Cannot compile l2fwd-jobstats example
On 2015-02-25 03:26, Tetsuya Mukawa wrote: > Hi, > > I cannot compile l2fwd-jobstats using master branch. > Here is log > > $ T=x86_64-native-linuxapp-gcc make examples > == Build examples for x86_64-native-linuxapp-gcc > == bond > == cmdline > == distributor > == exception_path > == helloworld > == ip_pipeline > == ip_reassembly > == ipv4_multicast > == kni > == l2fwd > == l2fwd-jobstats > make: *** l2fwd-jobstats: No such file or directory. Stop. > make[2]: *** [l2fwd-jobstats] Error 2 > make[1]: *** [x86_64-native-linuxapp-gcc_examples] Error 2 > make: *** [examples] Error 2 > > > As a result of bisecting, it seems after applying below commit, this > error can be seen. > > commit 2caeb8c0141dcf488f2d68aa8e8c44d1f85ed28b > Author: Pawel Wodkowski > Date: Tue Feb 24 17:33:24 2015 +0100 > > examples/l2fwd-jobstats: new example > > > Thanks, > Tetsuya > Looking on git log, there are missing two files there: examples/l2fwd-jobstats/Makefile examples/l2fwd-jobstats/main.c from patch http://dpdk.org/ml/archives/dev/2015-February/014107.html -- Pawel
[dpdk-dev] [PATCH v4 3/7] pmd: igb/ixgbe split nb_q_per_pool to rx and tx nb_q_per_pool
On 2015-02-25 04:24, Ouyang, Changchun wrote: > > >> -Original Message- >> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pawel Wodkowski >> Sent: Thursday, February 19, 2015 11:55 PM >> To: dev at dpdk.org >> Subject: [dpdk-dev] [PATCH v4 3/7] pmd: igb/ixgbe split nb_q_per_pool to rx >> and tx nb_q_per_pool >> [...] >> >> /* check valid queue number */ >> -if ((nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool) || >> -(nb_tx_q > RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool)) { >> +if ((nb_rx_q > RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool) > > Here, how about use nb_rx_q_per_pool to replace nb_tx_q_per_pool ? > so it will be more clear to check rx queue number. Yes, this should be nb_rx_q_per_pool. I missed this, because in next patch I moved this and corrected "on the fly" :). I will correct this in next version. -- Pawel
[dpdk-dev] [PATCH v4 4/7] move rte_eth_dev_check_mq_mode() logic to driver
On 2015-02-25 07:14, Ouyang, Changchun wrote: > > >> -Original Message- >> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pawel Wodkowski >> Sent: Thursday, February 19, 2015 11:55 PM >> To: dev at dpdk.org >> Subject: [dpdk-dev] [PATCH v4 4/7] move rte_eth_dev_check_mq_mode() >> logic to driver >> >> Function rte_eth_dev_check_mq_mode() is driver specific. It should be >> done in PF configuration phase. This patch move igb/ixgbe driver specific mq >> check and SRIOV configuration code to driver part. Also rewriting log >> messages to be shorter and more descriptive. >> >> Signed-off-by: Pawel Wodkowski >> --- >> lib/librte_ether/rte_ethdev.c | 197 >> --- >> lib/librte_pmd_e1000/igb_ethdev.c | 43 >> lib/librte_pmd_ixgbe/ixgbe_ethdev.c | 105 ++- >> lib/librte_pmd_ixgbe/ixgbe_ethdev.h | 5 +- >> lib/librte_pmd_ixgbe/ixgbe_pf.c | 202 >> +++- >> 5 files changed, 327 insertions(+), 225 deletions(-) >> >> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c >> index 4007054..aa27e39 100644 >> --- a/lib/librte_ether/rte_ethdev.c >> +++ b/lib/librte_ether/rte_ethdev.c >> @@ -502,195 +502,6 @@ rte_eth_dev_tx_queue_config(struct rte_eth_dev >> *dev, uint16_t nb_queues) >> return (0); >> } >> >> -static int >> -rte_eth_dev_check_vf_rss_rxq_num(uint8_t port_id, uint16_t nb_rx_q) -{ >> -struct rte_eth_dev *dev = &rte_eth_devices[port_id]; >> -switch (nb_rx_q) { >> -case 1: >> -case 2: >> -RTE_ETH_DEV_SRIOV(dev).active = >> -ETH_64_POOLS; >> -break; >> -case 4: >> -RTE_ETH_DEV_SRIOV(dev).active = >> -ETH_32_POOLS; >> -break; >> -default: >> -return -EINVAL; >> -} >> - >> -RTE_ETH_DEV_SRIOV(dev).nb_rx_q_per_pool = nb_rx_q; >> -RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = >> -dev->pci_dev->max_vfs * nb_rx_q; >> - >> -return 0; >> -} >> - >> -static int >> -rte_eth_dev_check_mq_mode(uint8_t port_id, uint16_t nb_rx_q, uint16_t >> nb_tx_q, >> - const struct rte_eth_conf *dev_conf) >> -{ >> -struct rte_eth_dev *dev = &rte_eth_devices[port_id]; >> - >> -if (RTE_ETH_DEV_SRIOV(dev).active != 0) { >> -/* check multi-queue mode */ >> -if ((dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB) || >> -(dev_conf->rxmode.mq_mode == ETH_MQ_RX_DCB_RSS) >> || >> -(dev_conf->txmode.mq_mode == ETH_MQ_TX_DCB)) { >> -/* SRIOV only works in VMDq enable mode */ >> -PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 >> -" SRIOV active, " >> -"wrong VMDQ mq_mode rx %u >> tx %u\n", >> -port_id, >> -dev_conf->rxmode.mq_mode, >> -dev_conf->txmode.mq_mode); >> -return (-EINVAL); >> -} >> - >> -switch (dev_conf->rxmode.mq_mode) { >> -case ETH_MQ_RX_VMDQ_DCB: >> -case ETH_MQ_RX_VMDQ_DCB_RSS: >> -/* DCB/RSS VMDQ in SRIOV mode, not implement >> yet */ >> -PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 >> -" SRIOV active, " >> -"unsupported VMDQ mq_mode >> rx %u\n", >> -port_id, dev_conf- >>> rxmode.mq_mode); >> -return (-EINVAL); >> -case ETH_MQ_RX_RSS: >> -PMD_DEBUG_TRACE("ethdev port_id=%" PRIu8 >> -" SRIOV active, " >> -"Rx mq mode is changed from:" >> -"mq_mode %u into VMDQ >> mq_mode %u\n", >> -port_id, >> -dev_conf->rxmode.mq_mode, >> -dev->data- >>> dev_conf.rxmode.mq_mode); >> -case ETH_MQ_RX_VMDQ_RSS: >> -
[dpdk-dev] [PATCH v4 5/7] pmd ixgbe: enable DCB in SRIOV
On 2015-02-25 04:36, Ouyang, Changchun wrote: >> @@ -652,7 +655,9 @@ ixgbe_get_vf_queues(struct rte_eth_dev *dev, >> >uint32_t vf, uint32_t *msgbuf) { >> >struct ixgbe_vf_info *vfinfo = >> >*IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data- >>> > >dev_private); >> >- uint32_t default_q = vf * >> >RTE_ETH_DEV_SRIOV(dev).nb_tx_q_per_pool; >> >+ struct ixgbe_dcb_config *dcbinfo = >> >+ IXGBE_DEV_PRIVATE_TO_DCB_CFG(dev->data- >>> > >dev_private); >> >+ uint32_t default_q = RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx; > Why need change the default_q here? > Because this field holds default queue index. -- Pawel
[dpdk-dev] [PATCH v2 0/4] Fix issues reported by static analysis tool
Static analysis report some issues against current DPDK version. Most of them need only cosmetic code changes (changing type of variable). One issue related with ring pmd fix real memory leak problem. PATCH v2 changes: - remove patch 5/5 as it was NACKed - reword commit log acording to mailing list sugestions. Pawel Wodkowski (4): rte_timer: change declaration of rte_timer_cb_t librte_kvargs: make rte_kvargs_free() be consistent with other "free()" functions pmd ring: fix possible memory leak during devinit cmdline: make parse_set_list() use size_t instead of int for low/high parameter lib/librte_cmdline/cmdline_parse_portlist.c | 4 ++-- lib/librte_kvargs/rte_kvargs.c | 4 lib/librte_kvargs/rte_kvargs.h | 3 ++- lib/librte_pmd_ring/rte_eth_ring.c | 6 +++--- lib/librte_timer/rte_timer.h| 4 ++-- 5 files changed, 13 insertions(+), 8 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH v2 1/4] rte_timer: change declaration of rte_timer_cb_t
This patch remove inconsistency between declaration of type rte_timer_cb_t, field f in struct rte_timer and function __rte_timer_reset(). Although compiler treat both of them the same, the static analysis tool like complain about that. Signed-off-by: Pawel Wodkowski --- lib/librte_timer/rte_timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h index 35b8719..77547c6 100644 --- a/lib/librte_timer/rte_timer.h +++ b/lib/librte_timer/rte_timer.h @@ -115,7 +115,7 @@ struct rte_timer; /** * Callback function type for timer expiry. */ -typedef void (rte_timer_cb_t)(struct rte_timer *, void *); +typedef void (*rte_timer_cb_t)(struct rte_timer *, void *); #define MAX_SKIPLIST_DEPTH 10 @@ -128,7 +128,7 @@ struct rte_timer struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH]; volatile union rte_timer_status status; /**< Status of timer. */ uint64_t period; /**< Period of timer (0 if not periodic). */ - rte_timer_cb_t *f; /**< Callback function. */ + rte_timer_cb_t f; /**< Callback function. */ void *arg; /**< Argument to callback function. */ }; -- 1.9.1
[dpdk-dev] [PATCH v2 2/4] librte_kvargs: make rte_kvargs_free() be consistent with other "free()" functions
By convenction free() functions should ignore NULL parameter. This patch add this behaviour for rte_kvargs_free(). Signed-off-by: Pawel Wodkowski --- lib/librte_kvargs/rte_kvargs.c | 4 lib/librte_kvargs/rte_kvargs.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c index 8bc1e46..c2dd051 100644 --- a/lib/librte_kvargs/rte_kvargs.c +++ b/lib/librte_kvargs/rte_kvargs.c @@ -174,8 +174,12 @@ rte_kvargs_process(const struct rte_kvargs *kvlist, void rte_kvargs_free(struct rte_kvargs *kvlist) { + if (!kvlist) + return; + if (kvlist->str != NULL) free(kvlist->str); + free(kvlist); } diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h index ef4efab..ae9ae79 100644 --- a/lib/librte_kvargs/rte_kvargs.h +++ b/lib/librte_kvargs/rte_kvargs.h @@ -115,7 +115,8 @@ void rte_kvargs_free(struct rte_kvargs *kvlist); * * For each key/value association that matches the given key, calls the * handler function with the for a given arg_name passing the value on the - * dictionary for that key and a given extra argument. + * dictionary for that key and a given extra argument. If *kvlist* is NULL + * function does nothing. * * @param kvlist * The rte_kvargs structure -- 1.9.1
[dpdk-dev] [PATCH v2 3/4] pmd ring: fix possible memory leak during devinit
Free kvlist on function exit to avoid memory leak. Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ring/rte_eth_ring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_ring/rte_eth_ring.c b/lib/librte_pmd_ring/rte_eth_ring.c index a5dc71e..f049bb3 100644 --- a/lib/librte_pmd_ring/rte_eth_ring.c +++ b/lib/librte_pmd_ring/rte_eth_ring.c @@ -527,7 +527,7 @@ out: static int rte_pmd_ring_devinit(const char *name, const char *params) { - struct rte_kvargs *kvlist; + struct rte_kvargs *kvlist = NULL; int ret = 0; struct node_action_list *info = NULL; @@ -548,7 +548,7 @@ rte_pmd_ring_devinit(const char *name, const char *params) info = rte_zmalloc("struct node_action_list", sizeof(struct node_action_list) + (sizeof(struct node_action_pair) * ret), 0); if (!info) - goto out; + goto out_free; info->total = ret; info->list = (struct node_action_pair*)(info + 1); @@ -567,8 +567,8 @@ rte_pmd_ring_devinit(const char *name, const char *params) } out_free: + rte_kvargs_free(kvlist); rte_free(info); -out: return ret; } -- 1.9.1
[dpdk-dev] [PATCH v2 4/4] cmdline: make parse_set_list() use size_t instead of int for low/high parameter
Fix warning reported during static analysis about size_t to int cast when passing parameters to parse_set_list(). This patch fix code formating errors that give checkpatch.pl errors after generating patch. Signed-off-by: Pawel Wodkowski --- lib/librte_cmdline/cmdline_parse_portlist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c index fc6c14e..9c1fe3e 100644 --- a/lib/librte_cmdline/cmdline_parse_portlist.c +++ b/lib/librte_cmdline/cmdline_parse_portlist.c @@ -78,7 +78,7 @@ struct cmdline_token_ops cmdline_token_portlist_ops = { }; static void -parse_set_list(cmdline_portlist_t * pl, int low, int high) +parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high) { do { pl->map |= (1 << low++); @@ -86,7 +86,7 @@ parse_set_list(cmdline_portlist_t * pl, int low, int high) } static int -parse_ports(cmdline_portlist_t * pl, const char * str) +parse_ports(cmdline_portlist_t *pl, const char *str) { size_t ps, pe; const char *first, *last; -- 1.9.1
[dpdk-dev] [PATCH] af_packet: Fix some klocwork errors
On 2015-02-26 07:42, Ouyang Changchun wrote: > Fix possible memory leak issue: free kvlist before return; > Fix possible resource lost issue: close qssockfd before return; > > Signed-off-by: Changchun Ouyang > --- > lib/librte_pmd_af_packet/rte_eth_af_packet.c | 11 +-- > 1 file changed, 9 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_pmd_af_packet/rte_eth_af_packet.c > b/lib/librte_pmd_af_packet/rte_eth_af_packet.c > index 80e9bdf..cf8f4fa 100644 > --- a/lib/librte_pmd_af_packet/rte_eth_af_packet.c > +++ b/lib/librte_pmd_af_packet/rte_eth_af_packet.c > @@ -694,6 +694,8 @@ error: > } > rte_free(*internals); > } > + if (qsockfd != -1) > + close(qsockfd); > return -1; > } > > @@ -822,16 +824,21 @@ rte_pmd_af_packet_devinit(const char *name, const char > *params) > > ret = rte_kvargs_process(kvlist, ETH_AF_PACKET_IFACE_ARG, >&open_packet_iface, &sockfd); > - if (ret < 0) > + if (ret < 0) { > + rte_kvargs_free(kvlist); > return -1; > + } > } > > ret = rte_eth_from_packet(name, &sockfd, numa_node, kvlist); > close(sockfd); /* no longer needed */ > > - if (ret < 0) > + if (ret < 0) { > + rte_kvargs_free(kvlist); > return -1; > + } > > + rte_kvargs_free(kvlist); > return 0; > } > > This patch is correct but it would be good to rework it to have common error exit point like in rte_pmd_init_internals() function you changed. -- Pawel
[dpdk-dev] [PATCH] eal: prevent dereferencing NULL pointer in rte_eal_devargs_add()
On failure devargs->args should not be accesed if devargs is NULL. Signed-off-by: Pawel Wodkowski --- lib/librte_eal/common/eal_common_devargs.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index 9b110f7..615945e 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -124,12 +124,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) return 0; fail: - if (devargs->args) - free(devargs->args); if (buf) free(buf); - if (devargs) + if (devargs) { + free(devargs->args); free(devargs); + } + return -1; } -- 1.9.1
[dpdk-dev] [PATCH] A fix to work around strict-aliasing rules breaking
On 2015-03-02 11:32, Bruce Richardson wrote: > On Mon, Mar 02, 2015 at 05:03:50PM +0800, zhihong.wang at intel.com wrote: >> Fixed strict-aliasing rules breaking errors for some GCC version. >> > > This looks messy. Also, I believe the definition of memcpy should include > the "restrict" keyword to indicate that source and dest can't overlap. Might > that help fix the issue? > Is this error related with overlapping or casting 'void *' to 'uintXX_t *' that make compiler report aliasing rule breaking? > >> Signed-off-by: Zhihong Wang >> --- >> .../common/include/arch/x86/rte_memcpy.h | 44 >> -- >> 1 file changed, 24 insertions(+), 20 deletions(-) >> >> diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h >> b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h >> index 69a5c6f..f412099 100644 >> --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h >> +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h >> @@ -195,6 +195,8 @@ rte_mov256blocks(uint8_t *dst, const uint8_t *src, >> size_t n) >> static inline void * >> rte_memcpy(void *dst, const void *src, size_t n) >> { >> +uintptr_t dstu = (uintptr_t)dst; >> +uintptr_t srcu = (uintptr_t)src; If so maybe using union here would be good solution or 'char *'. -- Pawel
[dpdk-dev] [PATCH] eal: prevent dereferencing NULL pointer in rte_eal_devargs_add()
On 2015-03-02 15:40, Wiles, Keith wrote: > > > On 3/2/15, 6:23 AM, "David Marchand" wrote: > >> Hello Pawel, >> >> On Mon, Mar 2, 2015 at 12:09 PM, Pawel Wodkowski >> >> wrote: >> >>> On failure devargs->args should not be accesed if devargs is NULL. >>> >> >> accessed. >> >> >>> >>> Signed-off-by: Pawel Wodkowski >>> --- >>> lib/librte_eal/common/eal_common_devargs.c | 7 --- >>> 1 file changed, 4 insertions(+), 3 deletions(-) >>> >>> diff --git a/lib/librte_eal/common/eal_common_devargs.c >>> b/lib/librte_eal/common/eal_common_devargs.c >>> index 9b110f7..615945e 100644 >>> --- a/lib/librte_eal/common/eal_common_devargs.c >>> +++ b/lib/librte_eal/common/eal_common_devargs.c >>> @@ -124,12 +124,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, >>> const >>> char *devargs_str) >>> return 0; >>> >>> fail: >>> - if (devargs->args) >>> - free(devargs->args); >>> if (buf) >>> free(buf); >>> - if (devargs) >>> + if (devargs) { >>> + free(devargs->args); > > Do you not still need to check for args being NULL before calling free? No, there is no need for that. The same for buf. This NOP check is common practice in DPDK. I woul be good to clean this in whole library in separate patch set. I recommend to read free() doc before doing another 'if (foo != NULL) free(foo)' http://pubs.opengroup.org/onlinepubs/009695399/functions/free.html -- Pawel
[dpdk-dev] [PATCH] eal: prevent dereferencing NULL pointer in rte_eal_devargs_add()
On 2015-03-02 17:47, Wiles, Keith wrote: > > > On 3/2/15, 8:55 AM, "Wodkowski, PawelX" wrote: > >> On 2015-03-02 15:40, Wiles, Keith wrote: >>> >>> >>> On 3/2/15, 6:23 AM, "David Marchand" wrote: >>> >>>> Hello Pawel, >>>> >>>> On Mon, Mar 2, 2015 at 12:09 PM, Pawel Wodkowski >>>> >>>> wrote: >>>> >>>>> On failure devargs->args should not be accesed if devargs is NULL. >>>>> >>>> >>>> accessed. >>>> >>>> >>>>> >>>>> Signed-off-by: Pawel Wodkowski >>>>> --- >>>>>lib/librte_eal/common/eal_common_devargs.c | 7 --- >>>>>1 file changed, 4 insertions(+), 3 deletions(-) >>>>> >>>>> diff --git a/lib/librte_eal/common/eal_common_devargs.c >>>>> b/lib/librte_eal/common/eal_common_devargs.c >>>>> index 9b110f7..615945e 100644 >>>>> --- a/lib/librte_eal/common/eal_common_devargs.c >>>>> +++ b/lib/librte_eal/common/eal_common_devargs.c >>>>> @@ -124,12 +124,13 @@ rte_eal_devargs_add(enum rte_devtype devtype, >>>>> const >>>>> char *devargs_str) >>>>> return 0; >>>>> >>>>>fail: >>>>> - if (devargs->args) >>>>> - free(devargs->args); >>>>> if (buf) >>>>> free(buf); >>>>> - if (devargs) >>>>> + if (devargs) { >>>>> + free(devargs->args); >>> >>> Do you not still need to check for args being NULL before calling free? >> >> No, there is no need for that. The same for buf. This NOP check is >> common practice in DPDK. I woul be good to clean this in whole library >> in separate patch set. >> >> I recommend to read free() doc before doing another 'if (foo != NULL) >> free(foo)' >> >> http://pubs.opengroup.org/onlinepubs/009695399/functions/free.html > > OK, did not realize this was changed. Do we know if all of the OSes DPDK > is built supports this free style? > > I know that VxWorks did not support this free() method and I did port DPDK > to that OS, but it is not a supported platform for DPDK. > > If some OS does not support passing NULL (and is supported by DPDK) to > free, then we need to abstract the free into a macro to allow those > systems to work correctly. I would expect using a macro for free would > also help if all frees were reworked to not test for NULL. > This is standard C behaviour (since ANSI C?) and VxWorks claim to be compatible with it. If they lie, why bother? > ++Keith >> >> -- >> Pawel > -- Pawel
[dpdk-dev] [PATCH v2] librte_eal/common: Fix cast from pointer to integer of different size
On 2015-03-03 03:20, Michael Qiu wrote: > /i686-native-linuxapp-gcc/include/rte_memcpy.h:592:23: error: > cast from pointer to integer of different size > [-Werror=pointer-to-int-cast] > >dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > > Type 'long long' is 64-bit in i686 platform while 'void *' > is 32-bit. > > Signed-off-by: Michael Qiu > --- > v2 --> v1: > Remove unnecessary casting (void *) > > lib/librte_eal/common/include/arch/x86/rte_memcpy.h | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > index 7b2d382..85a5f4d 100644 > --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > @@ -589,12 +589,12 @@ COPY_BLOCK_64_BACK15: >* unaligned copy functions require up to 15 bytes >* backwards access. >*/ > - dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > + dstofss = 16 - (int)((long)dst & 0x0F) + 16; > n -= dstofss; > rte_mov32((uint8_t *)dst, (const uint8_t *)src); > src = (const uint8_t *)src + dstofss; > dst = (uint8_t *)dst + dstofss; > - srcofs = (int)((long long)(const void *)src & 0x0F); > + srcofs = (int)((long)src & 0x0F); > > /** >* For aligned copy > I think you should use here size_t, (u)ptrdiff_t or (u)intptr_t as this will be more portable. Also type of dstofss and srcofs should be changed accordingly. -- Pawel
[dpdk-dev] [PATCH v2] librte_eal/common: Fix cast from pointer to integer of different size
> -Original Message- > From: Qiu, Michael > Sent: Tuesday, March 03, 2015 11:00 AM > To: Wodkowski, PawelX; dev at dpdk.org > Subject: Re: [dpdk-dev] [PATCH v2] librte_eal/common: Fix cast from pointer to > integer of different size > > On 3/3/2015 4:25 PM, Wodkowski, PawelX wrote: > > On 2015-03-03 03:20, Michael Qiu wrote: > >> /i686-native-linuxapp-gcc/include/rte_memcpy.h:592:23: error: > >> cast from pointer to integer of different size > >> [-Werror=pointer-to-int-cast] > >> > >>dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > >> > >> Type 'long long' is 64-bit in i686 platform while 'void *' > >> is 32-bit. > >> > >> Signed-off-by: Michael Qiu > >> --- > >> v2 --> v1: > >>Remove unnecessary casting (void *) > >> > >> lib/librte_eal/common/include/arch/x86/rte_memcpy.h | 4 ++-- > >> 1 file changed, 2 insertions(+), 2 deletions(-) > >> > >> diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > >> index 7b2d382..85a5f4d 100644 > >> --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > >> +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > >> @@ -589,12 +589,12 @@ COPY_BLOCK_64_BACK15: > >> * unaligned copy functions require up to 15 bytes > >> * backwards access. > >> */ > >> - dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > >> + dstofss = 16 - (int)((long)dst & 0x0F) + 16; > >>n -= dstofss; > >>rte_mov32((uint8_t *)dst, (const uint8_t *)src); > >>src = (const uint8_t *)src + dstofss; > >>dst = (uint8_t *)dst + dstofss; > >> - srcofs = (int)((long long)(const void *)src & 0x0F); > >> + srcofs = (int)((long)src & 0x0F); > >> > >>/** > >> * For aligned copy > >> > > I think you should use here size_t, (u)ptrdiff_t or (u)intptr_t as this > > Yes, but 'long' is enough, does it limit anything, or has any issue with > multiple platforms? > Those types ((u)ptrdiff_t, (u)intptr_t) exists specially for pointer-to-int and int-to-pointer casts. Using integer primitives might produce further warnings/error in the future and need further patches fixing the same place. Also why make offset variables signed and different type? This introduce a lot of unnecessary explicit and implicit casts or type promotions. > > will be more portable. > > Also type of dstofss and srcofs should be changed accordingly. > > No, I think, it should be offset. > > Thanks, > Michael > > -- Pawel
[dpdk-dev] [PATCH v2] librte_eal/common: Fix cast from pointer to integer of different size
On 2015-03-04 02:58, Qiu, Michael wrote: > On 3/3/2015 9:38 PM, Wodkowski, PawelX wrote: >>> -Original Message- >>> From: Qiu, Michael >>> Sent: Tuesday, March 03, 2015 11:00 AM >>> To: Wodkowski, PawelX; dev at dpdk.org >>> Subject: Re: [dpdk-dev] [PATCH v2] librte_eal/common: Fix cast from pointer >>> to >>> integer of different size >>> >>> On 3/3/2015 4:25 PM, Wodkowski, PawelX wrote: On 2015-03-03 03:20, Michael Qiu wrote: > /i686-native-linuxapp-gcc/include/rte_memcpy.h:592:23: error: > cast from pointer to integer of different size > [-Werror=pointer-to-int-cast] > > dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > > Type 'long long' is 64-bit in i686 platform while 'void *' > is 32-bit. > > Signed-off-by: Michael Qiu > --- > v2 --> v1: > Remove unnecessary casting (void *) > >lib/librte_eal/common/include/arch/x86/rte_memcpy.h | 4 ++-- >1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h >>> b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > index 7b2d382..85a5f4d 100644 > --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h > @@ -589,12 +589,12 @@ COPY_BLOCK_64_BACK15: >* unaligned copy functions require up to 15 bytes >* backwards access. >*/ > - dstofss = 16 - (int)((long long)(void *)dst & 0x0F) + 16; > + dstofss = 16 - (int)((long)dst & 0x0F) + 16; > n -= dstofss; > rte_mov32((uint8_t *)dst, (const uint8_t *)src); > src = (const uint8_t *)src + dstofss; > dst = (uint8_t *)dst + dstofss; > - srcofs = (int)((long long)(const void *)src & 0x0F); > + srcofs = (int)((long)src & 0x0F); > > /** >* For aligned copy > I think you should use here size_t, (u)ptrdiff_t or (u)intptr_t as this >>> Yes, but 'long' is enough, does it limit anything, or has any issue with >>> multiple platforms? >>> >> Those types ((u)ptrdiff_t, (u)intptr_t) exists specially for >> pointer-to-int and int-to-pointer casts. Using integer primitives might >> produce further warnings/error in the future and need further patches >> fixing the same place. > > OK, I will send out the v3 patch. > >> Also why make offset variables signed and different type? This introduce >> a lot of unnecessary explicit and implicit casts or type promotions. > > But Is it suitable to make offset (u)ptrdiff_t or (u)intptr_t? > I think, as final result is offset, its type should be size_t (the same type as type of offsetof() macro). This way you can use uptrdiff_t/uintptr_t and does not need of any signed-unsigned casting. > Thanks, > Michael > will be more portable. Also type of dstofss and srcofs should be changed accordingly. >>> No, I think, it should be offset. >>> >>> Thanks, >>> Michael >> > > -- Pawel
[dpdk-dev] [PATCH v5 1/3] ixgbe: Cleanups
On 2015-03-09 11:49, Ananyev, Konstantin wrote: > > >> -Original Message- >> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Vlad Zolotarov >> Sent: Monday, March 09, 2015 10:21 AM >> To: dev at dpdk.org >> Subject: [dpdk-dev] [PATCH v5 1/3] ixgbe: Cleanups >> >> - Removed the not needed casting. >> - ixgbe_dev_rx_init(): shorten the lines by defining a local alias >> variable to access >>&dev->data->dev_conf.rxmode. >> >> Signed-off-by: Vlad Zolotarov >> --- >> lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 27 --- >> 1 file changed, 12 insertions(+), 15 deletions(-) >> >> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c >> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c >> index 72c65df..609b5fd 100644 >> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c >> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c >> @@ -1032,8 +1032,7 @@ ixgbe_rx_alloc_bufs(struct igb_rx_queue *rxq) >> int diag, i; >> >> /* allocate buffers in bulk directly into the S/W ring */ >> -alloc_idx = (uint16_t)(rxq->rx_free_trigger - >> -(rxq->rx_free_thresh - 1)); >> +alloc_idx = rxq->rx_free_trigger - (rxq->rx_free_thresh - 1); > > I think all these extra casts came in to keep icc 12.* compiling without > warnings. > I am agree that they are unnecessary. > Though if we still have to support icc 12.* we either need to keep them, or > find > some other way to keep it happy. > Konstantin > What warnings icc 12.* is throwing? Only warning I can think of here is signed -> unsigned implicit cast. Changing '1' to '1U' helps? -- Pawel
[dpdk-dev] [PATCH 1/2] example: fix minor bug in l2fwd-jobstats init sequence
Fix check of returned values during application init phase. Signed-off-by: Pawel Wodkowski --- examples/l2fwd-jobstats/main.c | 13 + 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c index a5a1aaa..ba9d3c0 100644 --- a/examples/l2fwd-jobstats/main.c +++ b/examples/l2fwd-jobstats/main.c @@ -997,11 +997,11 @@ main(int argc, char **argv) drain_tsc, 0); rte_timer_init(&qconf->flush_timer); - rte_timer_reset(&qconf->flush_timer, drain_tsc, PERIODICAL, lcore_id, - &l2fwd_flush_job, NULL); + ret = rte_timer_reset(&qconf->flush_timer, drain_tsc, PERIODICAL, + lcore_id, &l2fwd_flush_job, NULL); if (ret < 0) { - rte_exit(1, "Failed to add flush job for lcore %u: %s", + rte_exit(1, "Failed to reset flush job timer for lcore %u: %s", lcore_id, rte_strerror(-ret)); } @@ -1019,8 +1019,13 @@ main(int argc, char **argv) rte_jobstats_set_update_period_function(job, l2fwd_job_update_cb); rte_timer_init(&qconf->rx_timers[i]); - rte_timer_reset(&qconf->rx_timers[i], 0, PERIODICAL, lcore_id, + ret = rte_timer_reset(&qconf->rx_timers[i], 0, PERIODICAL, lcore_id, &l2fwd_fwd_job, (void *)(uintptr_t)i); + + if (ret < 0) { + rte_exit(1, "Failed to reset lcore %u port %u job timer: %s", + lcore_id, qconf->rx_port_list[i], rte_strerror(-ret)); + } } } -- 1.9.1
[dpdk-dev] [PATCH 1/2] example: fix minor bug in l2fwd-jobstats init sequence
On 2015-03-09 17:05, Pawel Wodkowski wrote: > [PATCH 1/2] This is standalone patch. '1/2' generated by accident. -- Pawel
[dpdk-dev] [PATCH v6] af_packet: Fix some klocwork errors
> - > - return 0; > +exit: > + if (kvlist != NULL) No need for if(). This part was fine previous patch. > + rte_kvargs_free(kvlist); > + return ret; > } > > static struct rte_driver pmd_af_packet_drv = { > -- Pawel
[dpdk-dev] [PATCH v6] af_packet: Fix some klocwork errors
On 2015-03-10 09:49, Ouyang, Changchun wrote: > > >> -Original Message- >> From: Wodkowski, PawelX >> Sent: Tuesday, March 10, 2015 4:37 PM >> To: Ouyang, Changchun; dev at dpdk.org >> Cc: linville at tuxdriver.com; nhorman at tuxdriver.com >> Subject: Re: [PATCH v6] af_packet: Fix some klocwork errors >> >>> - >>> - return 0; >>> +exit: >>> + if (kvlist != NULL) >> >> No need for if(). This part was fine previous patch. >> > > If kvlist is NULL, no reason to call rte_kvargs_free to free it. > So, adding this test is better. For programmer convenience and reduce code bloat/obfuscation the same test is in rte_kvargs_free() (and every other free-like function). If there is no particular reason for that (ex performance which is not in this path) checking pointer for NULL value should be avoided before freeing it. > >>> + rte_kvargs_free(kvlist); >>> + return ret; >>>} >>> >>>static struct rte_driver pmd_af_packet_drv = { >>> >> >> >> -- >> Pawel -- Pawel
[dpdk-dev] [PATCH] doc: add l2fwd-jobstats user guide
Signed-off-by: Pawel Wodkowski --- doc/guides/sample_app_ug/index.rst| 1 + doc/guides/sample_app_ug/l2_forward_job_stats.rst | 637 ++ 2 files changed, 638 insertions(+) create mode 100644 doc/guides/sample_app_ug/l2_forward_job_stats.rst diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst index 5720181..c89a2f0 100644 --- a/doc/guides/sample_app_ug/index.rst +++ b/doc/guides/sample_app_ug/index.rst @@ -47,6 +47,7 @@ Sample Applications User Guide ipv4_multicast ip_reassembly kernel_nic_interface +l2_forward_job_stats l2_forward_real_virtual l3_forward l3_forward_power_man diff --git a/doc/guides/sample_app_ug/l2_forward_job_stats.rst b/doc/guides/sample_app_ug/l2_forward_job_stats.rst new file mode 100644 index 000..76cea71 --- /dev/null +++ b/doc/guides/sample_app_ug/l2_forward_job_stats.rst @@ -0,0 +1,637 @@ +.. BSD LICENSE +Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +* Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +L2 Forwarding Sample Application (in Real and Virtualized Environments) with core load statistics. +== + +The L2 Forwarding sample application is a simple example of packet processing using +the Data Plane Development Kit (DPDK) which +also takes advantage of Single Root I/O Virtualization (SR-IOV) features in a virtualized environment. + +.. note:: + +This application is a variation of L2 Forwarding sample application. It demonstrate possible +scheme of job stats library usage therefore some parts of this document is identical with original +L2 forwarding application. + +Overview + + +The L2 Forwarding sample application, which can operate in real and virtualized environments, +performs L2 forwarding for each packet that is received. +The destination port is the adjacent port from the enabled portmask, that is, +if the first four ports are enabled (portmask 0xf), +ports 1 and 2 forward into each other, and ports 3 and 4 forward into each other. +Also, the MAC addresses are affected as follows: + +* The source MAC address is replaced by the TX port MAC address + +* The destination MAC address is replaced by 02:00:00:00:00:TX_PORT_ID + +This application can be used to benchmark performance using a traffic-generator, as shown in the Figure 3. + +The application can also be used in a virtualized environment as shown in Figure 4. + +The L2 Forwarding application can also be used as a starting point for developing a new application based on the DPDK. + +.. _figure_3: + +**Figure 3. Performance Benchmark Setup (Basic Environment)** + +.. image4_png has been replaced + +|l2_fwd_benchmark_setup| + +.. _figure_4: + +**Figure 4. Performance Benchmark Setup (Virtualized Environment)** + +.. image5_png has been renamed + +|l2_fwd_virtenv_benchmark_setup| + +Virtual Function Setup Instructions +~~~ + +This application can use the virtual function available in the system and +therefore can be used in a virtual machine without passing through +the whole Network Device into a guest machine in a virtualized scenario. +The virtual functions can be enabled in the host machine or the hypervisor with the respective physical function driver. + +For example, in a Linux* host machine, it is possible to enable
[dpdk-dev] [PATCH v2] doc: add l2fwd-jobstats user guide
Signed-off-by: Pawel Wodkowski --- Changes v2 1. Fix trailing spaces and typos. 2. Add maintaners claim MAINTAINERS | 1 + doc/guides/sample_app_ug/index.rst| 1 + doc/guides/sample_app_ug/l2_forward_job_stats.rst | 637 ++ 3 files changed, 639 insertions(+) create mode 100644 doc/guides/sample_app_ug/l2_forward_job_stats.rst diff --git a/MAINTAINERS b/MAINTAINERS index 07fdf5e..a82e2f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -391,6 +391,7 @@ Job statistics M: Pawel Wodkowski F: lib/librte_jobstats/ F: examples/l2fwd-jobstats/ +F: doc/guides/sample_app_ug/l2_forward_job_stats.rst Test Applications diff --git a/doc/guides/sample_app_ug/index.rst b/doc/guides/sample_app_ug/index.rst index 5720181..c89a2f0 100644 --- a/doc/guides/sample_app_ug/index.rst +++ b/doc/guides/sample_app_ug/index.rst @@ -47,6 +47,7 @@ Sample Applications User Guide ipv4_multicast ip_reassembly kernel_nic_interface +l2_forward_job_stats l2_forward_real_virtual l3_forward l3_forward_power_man diff --git a/doc/guides/sample_app_ug/l2_forward_job_stats.rst b/doc/guides/sample_app_ug/l2_forward_job_stats.rst new file mode 100644 index 000..e25d7b1 --- /dev/null +++ b/doc/guides/sample_app_ug/l2_forward_job_stats.rst @@ -0,0 +1,637 @@ +.. BSD LICENSE +Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +* Neither the name of Intel Corporation nor the names of its +contributors may be used to endorse or promote products derived +from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +L2 Forwarding Sample Application (in Real and Virtualized Environments) with core load statistics. +== + +The L2 Forwarding sample application is a simple example of packet processing using +the Data Plane Development Kit (DPDK) which +also takes advantage of Single Root I/O Virtualization (SR-IOV) features in a virtualized environment. + +.. note:: + +This application is a variation of L2 Forwarding sample application. It demonstrate possible +scheme of job stats library usage therefore some parts of this document is identical with original +L2 forwarding application. + +Overview + + +The L2 Forwarding sample application, which can operate in real and virtualized environments, +performs L2 forwarding for each packet that is received. +The destination port is the adjacent port from the enabled portmask, that is, +if the first four ports are enabled (portmask 0xf), +ports 1 and 2 forward into each other, and ports 3 and 4 forward into each other. +Also, the MAC addresses are affected as follows: + +* The source MAC address is replaced by the TX port MAC address + +* The destination MAC address is replaced by 02:00:00:00:00:TX_PORT_ID + +This application can be used to benchmark performance using a traffic-generator, as shown in the Figure 3. + +The application can also be used in a virtualized environment as shown in Figure 4. + +The L2 Forwarding application can also be used as a starting point for developing a new application based on the DPDK. + +.. _figure_3: + +**Figure 3. Performance Benchmark Setup (Basic Environment)** + +.. image4_png has been replaced + +|l2_fwd_benchmark_setup| + +.. _figure_4: + +**Figure 4. Performance Benchmark Setup (Virtualized Environment)** + +.. image5_png has been renamed + +|l2_fwd_virtenv_benchmark_setup| + +Virtual Function Setup In
[dpdk-dev] [PATCH] ixgbe: fix buffer overrun bug in non-bulk alloc mode setup
From: Pawel Wodkowski When bulk alloc is enabled at compile time but preconditions for it are not met at runtime the ixgbe_reset_rx_queue() function overrides rxq->sw_ring not allocated elements. Fixes: 01fa1d6 ("ixgbe: unify Rx setup") Signed-off-by: Pawel Wodkowski --- lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c index 42f0aa5..dddc12f 100644 --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c @@ -2111,8 +2111,8 @@ ixgbe_reset_rx_queue(struct ixgbe_hw *hw, struct ixgbe_rx_queue *rxq) * entries is always allocated */ memset(&rxq->fake_mbuf, 0x0, sizeof(rxq->fake_mbuf)); - for (i = 0; i < RTE_PMD_IXGBE_RX_MAX_BURST; ++i) { - rxq->sw_ring[rxq->nb_rx_desc + i].mbuf = &rxq->fake_mbuf; + for (i = rxq->nb_rx_desc; i < len; ++i) { + rxq->sw_ring[i].mbuf = &rxq->fake_mbuf; } rxq->rx_nb_avail = 0; -- 1.9.1
[dpdk-dev] [PATCH 0/2] doc: update release notes for jobstats and bonding mode 6
Update release notes for jobstats and bonding mode 6. Pawel Wodkowski (2): doc: update bonding mode 6 release notes doc: add jobstats library and application release notes doc/guides/rel_notes/new_features.rst | 4 +++- doc/guides/rel_notes/supported_features.rst | 9 - 2 files changed, 11 insertions(+), 2 deletions(-) -- 1.9.1
[dpdk-dev] [PATCH 1/2] doc: update bonding mode 6 release notes
Signed-off-by: Pawel Wodkowski --- doc/guides/rel_notes/new_features.rst | 2 +- doc/guides/rel_notes/supported_features.rst | 5 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/guides/rel_notes/new_features.rst b/doc/guides/rel_notes/new_features.rst index a27e360..b8d4ea4 100644 --- a/doc/guides/rel_notes/new_features.rst +++ b/doc/guides/rel_notes/new_features.rst @@ -32,7 +32,7 @@ New Features * Link Bonding -* Support for 802.3ad link aggregation (mode 4) and transmit load balancing (mode 5) to the link bonding library. +* Support for adaptive load balancing (mode 6) to the link bonding library. * Support for registration of link status change callbacks with link bonding devices. diff --git a/doc/guides/rel_notes/supported_features.rst b/doc/guides/rel_notes/supported_features.rst index d87fcaa..e8785b7 100644 --- a/doc/guides/rel_notes/supported_features.rst +++ b/doc/guides/rel_notes/supported_features.rst @@ -39,7 +39,8 @@ Supported Features * Support for VFIO for mapping BARs and setting up interrupts -* Link Bonding PMD Library supporting round-robin, active backup, balance(layer 2, layer 2+3, and layer 3+4) and broadcast bonding modes +* Link Bonding PMD Library supporting round-robin, active backup, balance(layer 2, layer 2+3, and layer 3+4), broadcast bonding modes +802.3ad link aggregation (mode 4), transmit load balancing (mode 5) and adaptive load balancing (mode 6) * Support zero copy mode RX/TX in user space vhost sample @@ -313,6 +314,8 @@ Supported Features * L3 Forwarding with Power Management +* Bonding mode 6 + * QoS Scheduling * QoS Metering + Dropper -- 1.9.1
[dpdk-dev] [PATCH 2/2] doc: add jobstats library and application release notes
Signed-off-by: Pawel Wodkowski --- doc/guides/rel_notes/new_features.rst | 2 ++ doc/guides/rel_notes/supported_features.rst | 4 2 files changed, 6 insertions(+) diff --git a/doc/guides/rel_notes/new_features.rst b/doc/guides/rel_notes/new_features.rst index b8d4ea4..6acfe0c 100644 --- a/doc/guides/rel_notes/new_features.rst +++ b/doc/guides/rel_notes/new_features.rst @@ -58,6 +58,8 @@ New Features * Packet Distributor Sample Application +* Job Stats library and Sample Application. + * Poll Mode Driver - PCIE host-interface of Intel Ethernet Switch FM1 Series (librte_pmd_fm10k) * Basic Rx/Tx functions for PF/VF diff --git a/doc/guides/rel_notes/supported_features.rst b/doc/guides/rel_notes/supported_features.rst index e8785b7..c908877 100644 --- a/doc/guides/rel_notes/supported_features.rst +++ b/doc/guides/rel_notes/supported_features.rst @@ -308,6 +308,8 @@ Supported Features * L2 Forwarding (supports virtualized and non-virtualized environments) +* L2 Forwarding Job Stats + * L3 Forwarding (IPv4 and IPv6) * L3 Forwarding in a Virtualized Environment @@ -370,6 +372,8 @@ Supported Features * CPU-specific compiler optimization +* Job stats library for load/cpu utilization measurements + * Improvements to the Load Balancing sample application * The addition of a PAUSE instruction to tight loops for energy-usage and performance improvements -- 1.9.1
[dpdk-dev] [PATCH] hash: fix breaking strict-aliasing rules
On 2015-03-18 17:51, Yerden Zhumabekov wrote: > Fix rte_hash_crc() function. Casting uint64_t pointer to uin32_t > may trigger a compiler warning about breaking strict-aliasing rules. > To avoid that, introduce a lookup table which is used to mask out > a remainder of data. > > See issue #1, http://dpdk.org/ml/archives/dev/2015-March/015174.html > > Signed-off-by: Yerden Zhumabekov > --- > lib/librte_hash/rte_hash_crc.h | 31 +++ > 1 file changed, 15 insertions(+), 16 deletions(-) > > diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h > index 3dcd362..e81920f 100644 > --- a/lib/librte_hash/rte_hash_crc.h > +++ b/lib/librte_hash/rte_hash_crc.h > @@ -323,6 +323,16 @@ static const uint32_t crc32c_tables[8][256] = {{ >0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, > 0x56294D82, 0x1F1530A5 > }}; > > +static const uint64_t odd_8byte_mask[] = { > + 0x00FF, > + 0x, > + 0x00FF, > + 0x, > + 0x00FF, > + 0x, > + 0x00FF, > +}; > + > #define CRC32_UPD(crc, n) \ > (crc32c_tables[(n)][(crc) & 0xFF] ^ \ >crc32c_tables[(n)-1][((crc) >> 8) & 0xFF]) > @@ -535,38 +545,27 @@ static inline uint32_t > rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val) > { > unsigned i; > - uint64_t temp = 0; > + uint64_t temp; > const uint64_t *p64 = (const uint64_t *)data; > > for (i = 0; i < data_len / 8; i++) { > init_val = rte_hash_crc_8byte(*p64++, init_val); > } > > - switch (7 - (data_len & 0x07)) { > + i = 7 - (data_len & 0x07); Great idea with lookup table! Only one question here: why keeping this magic at all now? If you sort odd_8byte_mask opposite direction you can do something like that data_len &= 0x07; switch (data_len & 0x07) { case 1: case 2: case 3: case 4: temp = odd_8byte_mask[data_len] & *p64; init_val = rte_hash_crc_4byte(temp, init_val); case 5: case 6: case 7: temp = odd_8byte_mask[data_len] & *p64; init_val = rte_hash_crc_8byte(temp, init_val); break; default: break; } Or data_len &= 0x07; if (data_len > 0) { temp = odd_8byte_mask[data_len] & *p64; if (data_len <= 4) init_val = rte_hash_crc_4byte(temp, init_val); else init_val = rte_hash_crc_8byte(temp, init_val); } Is there something obvious what I am not seeing here? Pawel > + switch (i) { > case 0: > - temp |= (uint64_t) *((const uint8_t *)p64 + 6) << 48; > - /* Fallthrough */ > case 1: > - temp |= (uint64_t) *((const uint8_t *)p64 + 5) << 40; > - /* Fallthrough */ > case 2: > - temp |= (uint64_t) *((const uint8_t *)p64 + 4) << 32; > - temp |= *((const uint32_t *)p64); > + temp = odd_8byte_mask[i] & *p64; > init_val = rte_hash_crc_8byte(temp, init_val); > break; > case 3: > - init_val = rte_hash_crc_4byte(*(const uint32_t *)p64, init_val); > - break; > case 4: > - temp |= *((const uint8_t *)p64 + 2) << 16; > - /* Fallthrough */ > case 5: > - temp |= *((const uint8_t *)p64 + 1) << 8; > - /* Fallthrough */ > case 6: > - temp |= *((const uint8_t *)p64); > + temp = odd_8byte_mask[i] & *p64; > init_val = rte_hash_crc_4byte(temp, init_val); > - /* Fallthrough */ > default: > break; > } > -- Pawel
[dpdk-dev] [PATCH] hash: fix breaking strict-aliasing rules
On 2015-03-18 17:51, Yerden Zhumabekov wrote: > > - switch (7 - (data_len & 0x07)) { > + i = 7 - (data_len & 0x07); > + switch (i) { > case 0: > - temp |= (uint64_t) *((const uint8_t *)p64 + 6) << 48; > - /* Fallthrough */ > case 1: > - temp |= (uint64_t) *((const uint8_t *)p64 + 5) << 40; > - /* Fallthrough */ > case 2: > - temp |= (uint64_t) *((const uint8_t *)p64 + 4) << 32; > - temp |= *((const uint32_t *)p64); > + temp = odd_8byte_mask[i] & *p64; > init_val = rte_hash_crc_8byte(temp, init_val); > break; > case 3: > - init_val = rte_hash_crc_4byte(*(const uint32_t *)p64, init_val); > - break; > case 4: > - temp |= *((const uint8_t *)p64 + 2) << 16; > - /* Fallthrough */ > case 5: > - temp |= *((const uint8_t *)p64 + 1) << 8; > - /* Fallthrough */ > case 6: > - temp |= *((const uint8_t *)p64); > + temp = odd_8byte_mask[i] & *p64; > init_val = rte_hash_crc_4byte(temp, init_val); > - /* Fallthrough */ > default: > break; > } > Second thought: is this not an issue here reading 8 bytes by dereferencing *p64 if there is less bytes in buffer? -- Pawel