[dpdk-dev] [PATCH 0/3] add i40e RSS support in VF
As RSS in i40e VF is supported by hardware, these patches enable it in i40e PMD, and also enable its testing in testpmd. Helin Zhang (3): i40evf: add RSS support in VF app/testpmd: enable RSS support for i40e ethdev: improvements for some macro definition in head file app/test-pmd/testpmd.c | 13 ++- lib/librte_ether/rte_ethdev.h| 47 lib/librte_pmd_i40e/i40e_ethdev.c| 4 +- lib/librte_pmd_i40e/i40e_ethdev.h| 40 ++- lib/librte_pmd_i40e/i40e_ethdev_vf.c | 208 +++ 5 files changed, 284 insertions(+), 28 deletions(-) -- 1.8.1.4
[dpdk-dev] [PATCH 1/3] i40evf: add RSS support in VF
Add VF RSS support in Poll Mode Driver, as it is supported by hardware. Signed-off-by: Helin Zhang Acked-by: Cunming Liang Acked-by: Jijiang Liu --- lib/librte_pmd_i40e/i40e_ethdev.c| 4 +- lib/librte_pmd_i40e/i40e_ethdev.h| 40 ++- lib/librte_pmd_i40e/i40e_ethdev_vf.c | 208 +++ 3 files changed, 249 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_i40e/i40e_ethdev.c b/lib/librte_pmd_i40e/i40e_ethdev.c index 9ed31b5..85e8b18 100644 --- a/lib/librte_pmd_i40e/i40e_ethdev.c +++ b/lib/librte_pmd_i40e/i40e_ethdev.c @@ -3747,7 +3747,7 @@ DONE: } /* Configure hash enable flags for RSS */ -static uint64_t +uint64_t i40e_config_hena(uint64_t flags) { uint64_t hena = 0; @@ -3782,7 +3782,7 @@ i40e_config_hena(uint64_t flags) } /* Parse the hash enable flags */ -static uint64_t +uint64_t i40e_parse_hena(uint64_t flags) { uint64_t rss_hf = 0; diff --git a/lib/librte_pmd_i40e/i40e_ethdev.h b/lib/librte_pmd_i40e/i40e_ethdev.h index 64deef2..1d42cd2 100644 --- a/lib/librte_pmd_i40e/i40e_ethdev.h +++ b/lib/librte_pmd_i40e/i40e_ethdev.h @@ -68,6 +68,36 @@ I40E_FLAG_HEADER_SPLIT_ENABLED | \ I40E_FLAG_FDIR) +#define I40E_RSS_OFFLOAD_ALL ( \ + ETH_RSS_NONF_IPV4_UDP | \ + ETH_RSS_NONF_IPV4_TCP | \ + ETH_RSS_NONF_IPV4_SCTP | \ + ETH_RSS_NONF_IPV4_OTHER | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONF_IPV6_UDP | \ + ETH_RSS_NONF_IPV6_TCP | \ + ETH_RSS_NONF_IPV6_SCTP | \ + ETH_RSS_NONF_IPV6_OTHER | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_L2_PAYLOAD) + +/* All bits of RSS hash enable */ +#define I40E_RSS_HENA_ALL ( \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \ + (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \ + (1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6) | \ + (1ULL << I40E_FILTER_PCTYPE_FCOE_OX) | \ + (1ULL << I40E_FILTER_PCTYPE_FCOE_RX) | \ + (1ULL << I40E_FILTER_PCTYPE_FCOE_OTHER) | \ + (1ULL << I40E_FILTER_PCTYPE_L2_PAYLOAD)) + struct i40e_adapter; TAILQ_HEAD(i40e_mac_filter_list, i40e_mac_filter); @@ -253,6 +283,8 @@ struct i40e_vf_tx_queues { * Structure to store private data specific for VF instance. */ struct i40e_vf { + struct i40e_adapter *adapter; /* The adapter this VF associate to */ + struct rte_eth_dev_data *dev_data; /* Pointer to the device data */ uint16_t num_queue_pairs; uint16_t max_pkt_len; /* Maximum packet length */ bool promisc_unicast_enabled; @@ -310,8 +342,10 @@ int i40e_dev_link_update(struct rte_eth_dev *dev, void i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi); void i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi); int i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi, - struct i40e_vsi_vlan_pvid_info *info); + struct i40e_vsi_vlan_pvid_info *info); int i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on); +uint64_t i40e_config_hena(uint64_t flags); +uint64_t i40e_parse_hena(uint64_t flags); /* I40E_DEV_PRIVATE_TO */ #define I40E_DEV_PRIVATE_TO_PF(adapter) \ @@ -361,6 +395,10 @@ i40e_get_vsi_from_adapter(struct i40e_adapter *adapter) #define I40E_PF_TO_ADAPTER(pf) \ ((struct i40e_adapter *)pf->adapter) +/* I40E_VF_TO */ +#define I40E_VF_TO_HW(vf) \ + (&(((struct i40e_vf *)vf)->adapter->hw)) + static inline void i40e_init_adminq_parameter(struct i40e_hw *hw) { diff --git a/lib/librte_pmd_i40e/i40e_ethdev_vf.c b/lib/librte_pmd_i40e/i40e_ethdev_vf.c index 2726bfb..bef34cb 100644 --- a/lib/librte_pmd_i40e/i40e_ethdev_vf.c +++ b/lib/librte_pmd_i40e/i40e_ethdev_vf.c @@ -125,6 +125,19 @@ static void i40evf_dev_allmulticast_disable(struct rte_eth_dev *dev); static int i40evf_get_link_status(struct rte_eth_dev *dev, struct rte_eth_link *link); static int i40evf_init_vlan(struct rte_eth_dev *dev); +static int i40evf_config_rss(struct i40e_vf *vf); +static int i40evf_dev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta *reta_conf); +static int i40evf_dev_rss_reta_query(struct rte_eth_dev *dev, +struct rte_eth_rss_reta *reta_conf); +static int i40evf_dev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +static int i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); + +/* Default hash key buffer for RSS */ +stat
[dpdk-dev] [PATCH 2/3] app/testpmd: enable RSS support for i40e
i40e can support RSS no matter if SR-IOV is enabled or not, while ixgbe/igb can not support RSS if it is SR-IOV. Code changes are needed to support i40e RSS if SR-IOV is enabled. Signed-off-by: Helin Zhang Acked-by: Jijiang Liu Acked-by: Cunming Liang --- app/test-pmd/testpmd.c | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index e8a4b45..2a2ec76 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -1684,8 +1684,17 @@ init_port_config(void) port->dev_conf.rx_adv_conf.rss_conf.rss_hf = 0; } - /* In SR-IOV mode, RSS mode is not available */ - if (port->dcb_flag == 0 && port->dev_info.max_vfs == 0) { + /** +* For i40e, RSS is always available. +* For non-i40e, RSS is available in non-SRIOV mode, +* according to datasheet. +*/ + if (port->dcb_flag == 0 && + ((port->dev_info.max_vfs == 0) || + (!strcmp(port->dev_info.driver_name, + "rte_i40e_pmd")) || + (!strcmp(port->dev_info.driver_name, + "rte_i40evf_pmd" { if( port->dev_conf.rx_adv_conf.rss_conf.rss_hf != 0) port->dev_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; else -- 1.8.1.4
[dpdk-dev] [PATCH 3/3] ethdev: improvements for some macro definition in head file
improvements for some macro definition about RSS packet classification types in rte_ethdev.h. Signed-off-by: Helin Zhang Acked-by: Cunming Liang Acked-by: Jijiang Liu --- lib/librte_ether/rte_ethdev.h | 47 ++- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 50df654..3a0b33b 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -362,30 +362,31 @@ struct rte_eth_rss_conf { #define ETH_RSS_L2_PAYLOAD_SHIFT 63 /* for 1G & 10G */ -#define ETH_RSS_IPV4((uint16_t)1 << ETH_RSS_IPV4_SHIFT) -#define ETH_RSS_IPV4_TCP((uint16_t)1 << ETH_RSS_IPV4_TCP_SHIFT) -#define ETH_RSS_IPV6((uint16_t)1 << ETH_RSS_IPV6_SHIFT) -#define ETH_RSS_IPV6_EX ((uint16_t)1 << ETH_RSS_IPV6_EX_SHIFT) -#define ETH_RSS_IPV6_TCP((uint16_t)1 << ETH_RSS_IPV6_TCP_SHIFT) -#define ETH_RSS_IPV6_TCP_EX ((uint16_t)1 << ETH_RSS_IPV6_TCP_EX_SHIFT) -#define ETH_RSS_IPV4_UDP((uint16_t)1 << ETH_RSS_IPV4_UDP_SHIFT) -#define ETH_RSS_IPV6_UDP((uint16_t)1 << ETH_RSS_IPV6_UDP_SHIFT) -#define ETH_RSS_IPV6_UDP_EX ((uint16_t)1 << ETH_RSS_IPV6_UDP_EX_SHIFT) +#define ETH_RSS_IPV4(1 << ETH_RSS_IPV4_SHIFT) +#define ETH_RSS_IPV4_TCP(1 << ETH_RSS_IPV4_TCP_SHIFT) +#define ETH_RSS_IPV6(1 << ETH_RSS_IPV6_SHIFT) +#define ETH_RSS_IPV6_EX (1 << ETH_RSS_IPV6_EX_SHIFT) +#define ETH_RSS_IPV6_TCP(1 << ETH_RSS_IPV6_TCP_SHIFT) +#define ETH_RSS_IPV6_TCP_EX (1 << ETH_RSS_IPV6_TCP_EX_SHIFT) +#define ETH_RSS_IPV4_UDP(1 << ETH_RSS_IPV4_UDP_SHIFT) +#define ETH_RSS_IPV6_UDP(1 << ETH_RSS_IPV6_UDP_SHIFT) +#define ETH_RSS_IPV6_UDP_EX (1 << ETH_RSS_IPV6_UDP_EX_SHIFT) /* for 40G only */ -#define ETH_RSS_NONF_IPV4_UDP ((uint64_t)1 << ETH_RSS_NONF_IPV4_UDP_SHIFT) -#define ETH_RSS_NONF_IPV4_TCP ((uint64_t)1 << ETH_RSS_NONF_IPV4_TCP_SHIFT) -#define ETH_RSS_NONF_IPV4_SCTP ((uint64_t)1 << ETH_RSS_NONF_IPV4_SCTP_SHIFT) -#define ETH_RSS_NONF_IPV4_OTHER ((uint64_t)1 << ETH_RSS_NONF_IPV4_OTHER_SHIFT) -#define ETH_RSS_FRAG_IPV4 ((uint64_t)1 << ETH_RSS_FRAG_IPV4_SHIFT) -#define ETH_RSS_NONF_IPV6_UDP ((uint64_t)1 << ETH_RSS_NONF_IPV6_UDP_SHIFT) -#define ETH_RSS_NONF_IPV6_TCP ((uint64_t)1 << ETH_RSS_NONF_IPV6_TCP_SHIFT) -#define ETH_RSS_NONF_IPV6_SCTP ((uint64_t)1 << ETH_RSS_NONF_IPV6_SCTP_SHIFT) -#define ETH_RSS_NONF_IPV6_OTHER ((uint64_t)1 << ETH_RSS_NONF_IPV6_OTHER_SHIFT) -#define ETH_RSS_FRAG_IPV6 ((uint64_t)1 << ETH_RSS_FRAG_IPV6_SHIFT) -#define ETH_RSS_FCOE_OX ((uint64_t)1 << ETH_RSS_FCOE_OX_SHIFT) /* not used */ -#define ETH_RSS_FCOE_RX ((uint64_t)1 << ETH_RSS_FCOE_RX_SHIFT) /* not used */ -#define ETH_RSS_FCOE_OTHER ((uint64_t)1 << ETH_RSS_FCOE_OTHER_SHIFT) /* not used */ -#define ETH_RSS_L2_PAYLOAD ((uint64_t)1 << ETH_RSS_L2_PAYLOAD_SHIFT) +#define ETH_RSS_NONF_IPV4_UDP (1ULL << ETH_RSS_NONF_IPV4_UDP_SHIFT) +#define ETH_RSS_NONF_IPV4_TCP (1ULL << ETH_RSS_NONF_IPV4_TCP_SHIFT) +#define ETH_RSS_NONF_IPV4_SCTP (1ULL << ETH_RSS_NONF_IPV4_SCTP_SHIFT) +#define ETH_RSS_NONF_IPV4_OTHER (1ULL << ETH_RSS_NONF_IPV4_OTHER_SHIFT) +#define ETH_RSS_FRAG_IPV4 (1ULL << ETH_RSS_FRAG_IPV4_SHIFT) +#define ETH_RSS_NONF_IPV6_UDP (1ULL << ETH_RSS_NONF_IPV6_UDP_SHIFT) +#define ETH_RSS_NONF_IPV6_TCP (1ULL << ETH_RSS_NONF_IPV6_TCP_SHIFT) +#define ETH_RSS_NONF_IPV6_SCTP (1ULL << ETH_RSS_NONF_IPV6_SCTP_SHIFT) +#define ETH_RSS_NONF_IPV6_OTHER (1ULL << ETH_RSS_NONF_IPV6_OTHER_SHIFT) +#define ETH_RSS_FRAG_IPV6 (1ULL << ETH_RSS_FRAG_IPV6_SHIFT) +/* FCOE relevant should not be used */ +#define ETH_RSS_FCOE_OX (1ULL << ETH_RSS_FCOE_OX_SHIFT) +#define ETH_RSS_FCOE_RX (1ULL << ETH_RSS_FCOE_RX_SHIFT) +#define ETH_RSS_FCOE_OTHER (1ULL << ETH_RSS_FCOE_OTHER_SHIFT) +#define ETH_RSS_L2_PAYLOAD (1ULL << ETH_RSS_L2_PAYLOAD_SHIFT) #define ETH_RSS_IP ( \ ETH_RSS_IPV4 | \ -- 1.8.1.4
[dpdk-dev] [PATCH v2] virtio: Fix 32bit pmd for 64bit kernel
*virtio_net_hdr_mem* member within *virtqueue* structure stores a physical address and is defined as void ptr. When 32bit pmd is used with 64bit kernel this leads to truncation of 64bit physical address and pkt i/o does not work. Changed *virtio_net_hdr_mem* to *phys_addr_t* type and removed the typecasts Signed-off-by: Vijayakumar Muthuvel Manickam --- lib/librte_pmd_virtio/virtio_ethdev.c |6 +++--- lib/librte_pmd_virtio/virtqueue.h |5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c index d4730f9..764a769 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.c +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -331,7 +331,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64"\n", (uint64_t)mz->phys_addr); PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64"\n", (uint64_t)mz->addr); vq->virtio_net_hdr_mz = NULL; - vq->virtio_net_hdr_mem = (void *)NULL; + vq->virtio_net_hdr_mem = 0; if (queue_type == VTNET_TQ) { /* @@ -347,7 +347,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -ENOMEM; } vq->virtio_net_hdr_mem = - (void *)(uintptr_t)vq->virtio_net_hdr_mz->phys_addr; + vq->virtio_net_hdr_mz->phys_addr; memset(vq->virtio_net_hdr_mz->addr, 0, vq_size * sizeof(struct virtio_net_hdr)); } else if (queue_type == VTNET_CQ) { @@ -361,7 +361,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -ENOMEM; } vq->virtio_net_hdr_mem = - (void *)(uintptr_t)vq->virtio_net_hdr_mz->phys_addr; + vq->virtio_net_hdr_mz->phys_addr; memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); } diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h index a63723d..e244199 100644 --- a/lib/librte_pmd_virtio/virtqueue.h +++ b/lib/librte_pmd_virtio/virtqueue.h @@ -152,7 +152,7 @@ struct virtqueue { */ uint16_t vq_used_cons_idx; uint16_t vq_avail_idx; - void *virtio_net_hdr_mem; /**< hdr for each xmit packet */ + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ struct vq_desc_extra { void *cookie; @@ -354,7 +354,8 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) dxp->ndescs = needed; start_dp = txvq->vq_ring.desc; - start_dp[idx].addr = (uint64_t)(uintptr_t)txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr); + start_dp[idx].addr = + txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr); start_dp[idx].len = sizeof(struct virtio_net_hdr); start_dp[idx].flags = VRING_DESC_F_NEXT; idx = start_dp[idx].next; -- 1.7.1
[dpdk-dev] [PATCH 1/2] lib/librte_vhost: vhost library support to facilitate integration with vswitch.
Signed-off-by: Huawei Xie Acked-by: Konstantin Ananyev Acked-by: Thomos Long --- config/common_linuxapp |6 + lib/Makefile |1 + lib/librte_vhost/Makefile| 48 ++ lib/librte_vhost/eventfd_link/Makefile | 39 + lib/librte_vhost/eventfd_link/eventfd_link.c | 205 ++ lib/librte_vhost/eventfd_link/eventfd_link.h | 79 ++ lib/librte_vhost/rte_virtio_net.h| 192 + lib/librte_vhost/vhost-net-cdev.c| 363 ++ lib/librte_vhost/vhost-net-cdev.h| 112 +++ lib/librte_vhost/vhost_rxtx.c| 292 lib/librte_vhost/virtio-net.c| 1002 ++ 11 files changed, 2339 insertions(+) create mode 100644 lib/librte_vhost/Makefile create mode 100644 lib/librte_vhost/eventfd_link/Makefile create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100644 lib/librte_vhost/rte_virtio_net.h create mode 100644 lib/librte_vhost/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost-net-cdev.h create mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/virtio-net.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7bf5d80..002ed84 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -390,6 +390,12 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n # +# Compile vhost library +# +CONFIG_RTE_LIBRTE_VHOST=y +CONFIG_RTE_LIBRTE_VHOST_DEBUG=n + +# #Compile Xen domain0 support # CONFIG_RTE_LIBRTE_XEN_DOM0=n diff --git a/lib/Makefile b/lib/Makefile index 10c5bb3..007c174 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile new file mode 100644 index 000..f79778b --- /dev/null +++ b/lib/librte_vhost/Makefile @@ -0,0 +1,48 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_vhost.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse +LDFLAGS += -lfuse +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile new file mode 100644 index 000..5fe7297 --- /dev/null +++ b/lib/librte_vhost/eventfd_link/Makefile @@ -0,0 +1,39 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, a
[dpdk-dev] [PATCH 0/2] user space vhost library
This user space vhost library is based off user space vhost example and aims to provide same API for different vhost implementations. This implementation includes user space vhost cuse driver, kernel module for eventfd proxy and vhost enqueue/dequeue functionalities. hxie5 (2): vhost library support to facilitate integration with switch. Turn off vhost_lib by default as it needs fuse, fuse-devel to compile config/common_linuxapp |7 + lib/Makefile |1 + lib/librte_vhost/Makefile| 48 ++ lib/librte_vhost/eventfd_link/Makefile | 39 + lib/librte_vhost/eventfd_link/eventfd_link.c | 205 ++ lib/librte_vhost/eventfd_link/eventfd_link.h | 79 ++ lib/librte_vhost/rte_virtio_net.h| 192 + lib/librte_vhost/vhost-net-cdev.c| 363 ++ lib/librte_vhost/vhost-net-cdev.h| 112 +++ lib/librte_vhost/vhost_rxtx.c| 292 lib/librte_vhost/virtio-net.c| 1002 ++ 11 files changed, 2340 insertions(+) create mode 100644 lib/librte_vhost/Makefile create mode 100644 lib/librte_vhost/eventfd_link/Makefile create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100644 lib/librte_vhost/rte_virtio_net.h create mode 100644 lib/librte_vhost/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost-net-cdev.h create mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/virtio-net.c -- 1.8.1.4
[dpdk-dev] [PATCH 2/2] lib/Makefile: Turn off vhost_lib by default as it needs fuse, fuse-devel to compile
Signed-off-by: Huawei Xie --- config/common_linuxapp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/common_linuxapp b/config/common_linuxapp index 002ed84..5b58278 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -391,8 +391,9 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n # # Compile vhost library +# fuse, fuse-devel, kernel-modules-extra packages are needed # -CONFIG_RTE_LIBRTE_VHOST=y +CONFIG_RTE_LIBRTE_VHOST=n CONFIG_RTE_LIBRTE_VHOST_DEBUG=n # -- 1.8.1.4
[dpdk-dev] [PATCH v2] user space vhost driver library
This user space vhost library is based off user space vhost example and aims to provide same API for different vhost implementations. This implementation includes user space vhost cuse driver, kernel module for eventfd proxy and vhost enqueue/dequeue functionalities. Huawei Xie (1): vhost library support to facilitate integration with vswitch. config/common_linuxapp |7 + lib/Makefile |1 + lib/librte_vhost/Makefile| 48 ++ lib/librte_vhost/eventfd_link/Makefile | 39 + lib/librte_vhost/eventfd_link/eventfd_link.c | 205 ++ lib/librte_vhost/eventfd_link/eventfd_link.h | 79 ++ lib/librte_vhost/rte_virtio_net.h| 192 + lib/librte_vhost/vhost-net-cdev.c| 363 ++ lib/librte_vhost/vhost-net-cdev.h| 112 +++ lib/librte_vhost/vhost_rxtx.c| 292 lib/librte_vhost/virtio-net.c| 1002 ++ 11 files changed, 2340 insertions(+) create mode 100644 lib/librte_vhost/Makefile create mode 100644 lib/librte_vhost/eventfd_link/Makefile create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100644 lib/librte_vhost/rte_virtio_net.h create mode 100644 lib/librte_vhost/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost-net-cdev.h create mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/virtio-net.c -- 1.8.1.4
[dpdk-dev] [PATCH v2] lib/librte_vhost: vhost library support to facilitate integration with vswitch.
Signed-off-by: Huawei Xie --- config/common_linuxapp |7 + lib/Makefile |1 + lib/librte_vhost/Makefile| 48 ++ lib/librte_vhost/eventfd_link/Makefile | 39 + lib/librte_vhost/eventfd_link/eventfd_link.c | 205 ++ lib/librte_vhost/eventfd_link/eventfd_link.h | 79 ++ lib/librte_vhost/rte_virtio_net.h| 192 + lib/librte_vhost/vhost-net-cdev.c| 363 ++ lib/librte_vhost/vhost-net-cdev.h| 112 +++ lib/librte_vhost/vhost_rxtx.c| 292 lib/librte_vhost/virtio-net.c| 1002 ++ 11 files changed, 2340 insertions(+) create mode 100644 lib/librte_vhost/Makefile create mode 100644 lib/librte_vhost/eventfd_link/Makefile create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100644 lib/librte_vhost/rte_virtio_net.h create mode 100644 lib/librte_vhost/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost-net-cdev.h create mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/virtio-net.c diff --git a/config/common_linuxapp b/config/common_linuxapp index 7bf5d80..5b58278 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -390,6 +390,13 @@ CONFIG_RTE_KNI_VHOST_DEBUG_RX=n CONFIG_RTE_KNI_VHOST_DEBUG_TX=n # +# Compile vhost library +# fuse, fuse-devel, kernel-modules-extra packages are needed +# +CONFIG_RTE_LIBRTE_VHOST=n +CONFIG_RTE_LIBRTE_VHOST_DEBUG=n + +# #Compile Xen domain0 support # CONFIG_RTE_LIBRTE_XEN_DOM0=n diff --git a/lib/Makefile b/lib/Makefile index 10c5bb3..007c174 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -60,6 +60,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile new file mode 100644 index 000..f79778b --- /dev/null +++ b/lib/librte_vhost/Makefile @@ -0,0 +1,48 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_vhost.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 -lfuse +LDFLAGS += -lfuse +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost-net-cdev.c virtio-net.c vhost_rxtx.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal lib/librte_mbuf + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile new file mode 100644 index 000..5fe7297 --- /dev/null +++ b/lib/librte_vhost/eventfd_link/Makefile @@ -0,0 +1,39 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modifi
[dpdk-dev] [PATCH v2] user space vhost driver library
Merged two patches in previous patch set into 1. > -Original Message- > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Huawei Xie > Sent: Friday, July 18, 2014 5:56 PM > To: dev at dpdk.org > Subject: [dpdk-dev] [PATCH v2] user space vhost driver library > > This user space vhost library is based off user space vhost example and aims > to > provide same API for different vhost implementations. This implementation > includes user space vhost cuse driver, kernel module for eventfd proxy and > vhost > enqueue/dequeue functionalities. > > Huawei Xie (1): > vhost library support to facilitate integration with vswitch. > > config/common_linuxapp |7 + > lib/Makefile |1 + > lib/librte_vhost/Makefile| 48 ++ > lib/librte_vhost/eventfd_link/Makefile | 39 + > lib/librte_vhost/eventfd_link/eventfd_link.c | 205 ++ > lib/librte_vhost/eventfd_link/eventfd_link.h | 79 ++ > lib/librte_vhost/rte_virtio_net.h| 192 + > lib/librte_vhost/vhost-net-cdev.c| 363 ++ > lib/librte_vhost/vhost-net-cdev.h| 112 +++ > lib/librte_vhost/vhost_rxtx.c| 292 > lib/librte_vhost/virtio-net.c| 1002 > ++ > 11 files changed, 2340 insertions(+) > create mode 100644 lib/librte_vhost/Makefile > create mode 100644 lib/librte_vhost/eventfd_link/Makefile > create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c > create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h > create mode 100644 lib/librte_vhost/rte_virtio_net.h > create mode 100644 lib/librte_vhost/vhost-net-cdev.c > create mode 100644 lib/librte_vhost/vhost-net-cdev.h > create mode 100644 lib/librte_vhost/vhost_rxtx.c > create mode 100644 lib/librte_vhost/virtio-net.c > > -- > 1.8.1.4
[dpdk-dev] [PATCH v2 05/10] Subjec: igb_uio: msix cleanups
Hi Stephen, 2014-06-06 16:50, Stephen Hemminger: > Since only one MSI-X entry is ever defined, there is no need to > put it as an array in the driver private data structure. One msix_entry > can just be put on the stack and initialized there. When merging this patch, I realized it's not complete: an occurence of the msix_entries array is remaining. See the regarding part of your patch and my proposal below to be merged in this patch. > @@ -67,8 +52,6 @@ > struct pci_dev *pdev; > spinlock_t lock; /* spinlock for accessing PCI config space or msix data > in multi tasks/isr */ enum igbuio_intr_mode mode; > - struct msix_entry \ > - msix_entries[IGBUIO_NUM_MSI_VECTORS]; /* pointer to the msix > vectors to > be allocated later */ }; > > static char *intr_mode; > @@ -526,17 +509,16 @@ > > /* check if it need to try msix first */ > if (igbuio_intr_mode_preferred == IGBUIO_MSIX_INTR_MODE) { > - int vector; > - > - for (vector = 0; vector < IGBUIO_NUM_MSI_VECTORS; vector ++) > - udev->msix_entries[vector].entry = vector; > + /* only one MSIX vector needed */ > + struct msix_entry msix_entry = { > + .entry = 0, > + }; > > - if (pci_enable_msix(udev->pdev, udev->msix_entries, > IGBUIO_NUM_MSI_VECTORS) == 0) { > + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > udev->mode = IGBUIO_MSIX_INTR_MODE; > - } > - else { > - pci_disable_msix(udev->pdev); > - pr_info("fail to enable pci msix, or not enough msix > entries\n"); > + } else { > + pr_err("failed to enable pci msix, or not enough msix > entries\n"); > + udev->mode = IGBUIO_LEGACY_INTR_MODE; > } > } > switch (udev->mode) { Proposed changes: - udev->info.irq need to be set with msix_entry - udev->mode is already the legacy one by default if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { udev->mode = RTE_INTR_MODE_MSIX; - } else { + udev->info.irq = msix_entry.vector; + udev->info.irq_flags = 0; + } else pr_err("failed to enable pci msix, or not enough msix entries\n"); - udev->mode = IGBUIO_LEGACY_INTR_MODE; - } } - switch (udev->mode) { - case RTE_INTR_MODE_MSIX: - udev->info.irq_flags = 0; - udev->info.irq = udev->msix_entries[0].vector; - break; - case RTE_INTR_MODE_MSI: - break; - case RTE_INTR_MODE_LEGACY: + if (udev->mode == RTE_INTR_MODE_LEGACY) { udev->info.irq_flags = IRQF_SHARED; udev->info.irq = dev->irq; - break; - default: - break; } Please confirm it's ok for you. -- Thomas
[dpdk-dev] [PATCH v2 08/10] igb_uio: fix IRQ mode handling
Hi Stephen, I cannot merge this part because some lines were removed from the context. I think I just have to add them but I would like confirmation. See below. > @@ -512,36 +499,36 @@ > #endif > udev->info.priv = udev; > udev->pdev = dev; > - udev->mode = 0; /* set the default value for interrupt mode */ > - spin_lock_init(&udev->lock); > > - /* check if it need to try msix first */ > - if (igbuio_intr_mode_preferred == IGBUIO_MSIX_INTR_MODE) { > - /* only one MSIX vector needed */ > - struct msix_entry msix_entry = { > - .entry = 0, > - }; > - > - if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > + switch (igbuio_intr_mode_preferred) { > + case IGBUIO_MSIX_INTR_MODE: > + /* Only 1 msi-x vector needed */ > + msix_entry.entry = 0; > + if (pci_enable_msix(dev, &msix_entry, 1) == 0) { > + dev_dbg(&dev->dev, "using MSI-X"); > + udev->info.irq = msix_entry.vector; > udev->mode = IGBUIO_MSIX_INTR_MODE; > - } else { > - pr_err("failed to enable pci msix, or not enough msix > entries\n"); > - udev->mode = IGBUIO_LEGACY_INTR_MODE; > + break; > } > - } > - switch (udev->mode) { > - case IGBUIO_MSIX_INTR_MODE: > - udev->info.irq_flags = 0; > - udev->info.irq = udev->msix_entries[0].vector; > - break; > + /* fall back to MSI */ > case IGBUIO_MSI_INTR_MODE: > - break; > + if (pci_enable_msi(dev) == 0) { > + dev_dbg(&dev->dev, "using MSI"); > + udev->info.irq = dev->irq; > + udev->mode = IGBUIO_MSI_INTR_MODE; > + break; > + } > + /* fall back to INTX */ > case IGBUIO_LEGACY_INTR_MODE: > - udev->info.irq_flags = IRQF_SHARED; > - udev->info.irq = dev->irq; > - break; > - default: > - break; > + if (pci_intx_mask_supported(dev)) { > + dev_dbg(&dev->dev, "using INTX"); > + udev->info.irq_flags = IRQF_SHARED; > + udev->mode = IGBUIO_LEGACY_INTR_MODE; > + } else { > + dev_err(&dev->dev, "PCI INTX mask not supported\n"); > + err = -EIO; > + goto fail_release_iomem; > + } > } There is a problem here. These 2 lines are missing: pci_set_drvdata(dev, udev); igbuio_pci_irqcontrol(&udev->info, 0); > err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); -- Thomas
[dpdk-dev] [PATCH v2 08/10] igb_uio: fix IRQ mode handling
Hi Stephen, I have other (inlined) comments on this patch. > udev->info.version = "0.1"; > udev->info.handler = igbuio_pci_irqhandler; > udev->info.irqcontrol = igbuio_pci_irqcontrol; > + udev->info.irq = dev->irq; [...] > + /* fall back to MSI */ > case IGBUIO_MSI_INTR_MODE: > - break; > + if (pci_enable_msi(dev) == 0) { > + dev_dbg(&dev->dev, "using MSI"); > + udev->info.irq = dev->irq; I think we can remove this line: info.irq is already set to the right value. > + udev->mode = IGBUIO_MSI_INTR_MODE; > + break; > + } There is no default case in this switch statement. It's now required for the enum completeness. So I suggest to add these lines: + default: + dev_err(&dev->dev, "unknown interrupt mode\n"); + err = -EINVAL; + goto fail_release_iomem; -- Thomas
[dpdk-dev] Multiprocess /dev/uio mmap Bug/possible fix
All, If this was fixed in 1.7 and I missed it I apologize (but it looks from source to still be broken). I am using DPDK 1.6.0r2 (will be upgrading to 1.7.0 soon) on RHEL 6.4. I've converted the functions below to 1.7.0 names/locations since it looks to still be an issue there. tl;dr -- to get examples/symmetric_mp to work at all I had to manually force the address used to map PCI UIO devices in the primary process which I achieved by: In eal_memory.c :: get_virtual_area(...) -- Use directly then increment internal_config.base_virtaddr instead of incrementing/adding baseaddr_offset In eal_pci_uio.c in 1.7.0 :: pci_uio_map_resource -- In the primary process, use and increment internal_config.base_virtaddr as the first argument when calling pci_uio_map_resource (instead of NULL). Background/reason: Recently I was trying to get multiprocessing with each process reading from one queue working and ran into issues with the call to mmap the memory for /dev/uioX. Even the example 'symmetric_mp' application was failing. The issue is that there are two memory regions which are mmap'd for each /dev/uioX device and the second one, in the secondary process, is being mapped to an address other than the requested address. This causes the check for pci_map_resource(...) != uio_res->maps[i].addr to fail. I tracked the reason for not using the requested address to the mmap call to create the stack for a thread created during one of (I forget which - I think eal) eal/pci/pmd_init calls which is used to handle interrupts. With DPDK compiled as a single shared library on our system the memory address handed for the second memory reach for each /dev/uioX in the primary process is right in the middle of the stack for that thread. My possible fix: use --base-virtaddr to populate the requested address for mmaping the memory for /dev/uioX I am not sure if this is safe but if it is I can submit a patch. My approach was in eal_memory.c to, rather than keep an 'offset' variable locally just increment the global base_virtaddr value each time through that loop and then use the resulting final value when doing the /dev/uioX mappings. Of course in 1.6.0r2 I also ran into the errno bug with parsing --base-virtaddr and strtoull but it looks like that was fixed in 1.7.0. Thoughts? Thanks! Stefan
[dpdk-dev] VMWare Performance - vmxnet3-usermap
All, I've been playing with DPDK recently on a variety of bare metal Linux installations and so far and have seen wonderful improvements in performance on both our Westmere and Sandy Bridge based servers. However when I install ESXi 5.1 (not linked to a vSphere management system -- stand alone ESXi installation) on one of the Westmere systems and use vmxnet3-usermap with the standard VMWare vSwitch my performance drops way down. Does anyone have a sense of pps/bps I can realistically expect to see from vmxnet3-usermap without doing SR-IOV/passthrough? Raw CentOS and RHEL 6.4 we're seeing 14.88Mpps/10Gbps but going to ESXi running CentOS 6.4 we're seeing 500Kpps/4Gbps. Is that reasonable (obviously packet rate is with small packets and data rate is with larger packets). Without going to SR-IOV is there anything that I can do to improve this performance in vmware? Also, I know SR-IOV breaks many of the HA/auto balancing features of VMware. Is the same true with vmxnet3-usermap or is that safe to use with VMs floating around a cluster willy-nilly? Thanks, Stefan
[dpdk-dev] Multiprocess /dev/uio mmap Bug/possible fix
> All, > > If this was fixed in 1.7 and I missed it I apologize (but it looks from > source to > still be broken). I am using DPDK 1.6.0r2 (will be upgrading to > 1.7.0 soon) on RHEL 6.4. I've converted the functions below to 1.7.0 > names/locations since it looks to still be an issue there. > > tl;dr -- to get examples/symmetric_mp to work at all I had to manually force > the address used to map PCI UIO devices in the primary process which I > achieved by: > In eal_memory.c :: get_virtual_area(...) -- Use directly then increment > internal_config.base_virtaddr instead of incrementing/adding > baseaddr_offset In eal_pci_uio.c in 1.7.0 :: pci_uio_map_resource -- In the > primary process, use and increment internal_config.base_virtaddr as the first > argument when calling pci_uio_map_resource (instead of NULL). > > > Background/reason: > Recently I was trying to get multiprocessing with each process reading from > one queue working and ran into issues with the call to mmap the memory for > /dev/uioX. Even the example 'symmetric_mp' application was failing. The > issue is that there are two memory regions which are mmap'd for each > /dev/uioX device and the second one, in the secondary process, is being > mapped to an address other than the requested address. This causes the > check for pci_map_resource(...) != uio_res->maps[i].addr to fail. I tracked > the reason for not using the requested address to the mmap call to create > the stack for a thread created during one of (I forget which - I think eal) > eal/pci/pmd_init calls which is used to handle interrupts. > > With DPDK compiled as a single shared library on our system the memory > address handed for the second memory reach for each /dev/uioX in the > primary process is right in the middle of the stack for that thread. > > My possible fix: use --base-virtaddr to populate the requested address for > mmaping the memory for /dev/uioX I am not sure if this is safe but if it is I > can submit a patch. My approach was in eal_memory.c to, rather than keep > an 'offset' variable locally just increment the global base_virtaddr value > each > time through that loop and then use the resulting final value when doing the > /dev/uioX mappings. > > Of course in 1.6.0r2 I also ran into the errno bug with parsing > --base-virtaddr > and strtoull but it looks like that was fixed in 1.7.0. We also see this. There is a patch in a patchset that resolves this http://dpdk.org/ml/archives/dev/2014-June/003689.html It hasn?t been pushed yet. > > > Thoughts? Thanks! > Stefan
[dpdk-dev] [PATCH 00/10] igb_uio related patches
Update patches so all are now bisectable, and incorporate comments. Also fix the checkpatch warnings that are fixable.
[dpdk-dev] [PATCH 01/10] igb_uio: use kernel standard log message
Use Linux kernel standard coding conventions for console messages. Bare use of printk() is not desirable and is reported as a style problem by checkpatch. Instead use pr_info() and dev_info() to print out log messages where appropriate. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:29:03.435405487 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:59:18.0 -0700 @@ -22,6 +22,8 @@ * Intel Corporation */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -394,7 +396,7 @@ done: pci_unlock(pdev); spin_unlock: spin_unlock_irqrestore(&udev->lock, flags); - printk(KERN_INFO "irq 0x%x %s\n", irq, (ret == IRQ_HANDLED) ? "handled" : "not handled"); + pr_info("irq 0x%x %s\n", irq, (ret == IRQ_HANDLED) ? "handled" : "not handled"); return ret; } @@ -557,7 +559,7 @@ igbuio_pci_probe(struct pci_dev *dev, co * memory */ if (pci_enable_device(dev)) { - printk(KERN_ERR "Cannot enable PCI device\n"); + dev_err(&dev->dev, "Cannot enable PCI device\n"); goto fail_free; } @@ -566,7 +568,7 @@ igbuio_pci_probe(struct pci_dev *dev, co * module */ if (pci_request_regions(dev, "igb_uio")) { - printk(KERN_ERR "Cannot request regions\n"); + dev_err(&dev->dev, "Cannot request regions\n"); goto fail_disable; } @@ -579,10 +581,10 @@ igbuio_pci_probe(struct pci_dev *dev, co /* set 64-bit DMA mask */ if (pci_set_dma_mask(dev, DMA_BIT_MASK(64))) { - printk(KERN_ERR "Cannot set DMA mask\n"); + dev_err(&dev->dev, "Cannot set DMA mask\n"); goto fail_release_iomem; } else if (pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64))) { - printk(KERN_ERR "Cannot set consistent DMA mask\n"); + dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); goto fail_release_iomem; } @@ -613,7 +615,7 @@ igbuio_pci_probe(struct pci_dev *dev, co } else { pci_disable_msix(udev->pdev); - printk(KERN_INFO "fail to enable pci msix, or not enough msix entries\n"); + pr_info("fail to enable pci msix, or not enough msix entries\n"); } } switch (udev->mode) { @@ -665,7 +667,7 @@ igbuio_pci_remove(struct pci_dev *dev) struct uio_info *info = pci_get_drvdata(dev); if (info->priv == NULL) { - printk(KERN_DEBUG "Not igbuio device\n"); + pr_notice("Not igbuio device\n"); return; } @@ -685,18 +687,18 @@ static int igbuio_config_intr_mode(char *intr_str) { if (!intr_str) { - printk(KERN_INFO "Use MSIX interrupt by default\n"); + pr_info("Use MSIX interrupt by default\n"); return 0; } if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; - printk(KERN_INFO "Use MSIX interrupt\n"); + pr_info("Use MSIX interrupt\n"); } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; - printk(KERN_INFO "Use legacy interrupt\n"); + pr_info("Use legacy interrupt\n"); } else { - printk(KERN_INFO "Error: bad parameter - %s\n", intr_str); + pr_info("Error: bad parameter - %s\n", intr_str); return -EINVAL; }
[dpdk-dev] [PATCH 02/10] igb_uio: use standard uio naming
Don't put capitialization and space in name since it will show up in /proc/interrupts. Instead use driver name to follow the conventions used in the kernel by other drivers. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:29:53.775667934 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:59:16.0 -0700 @@ -589,7 +589,7 @@ igbuio_pci_probe(struct pci_dev *dev, co } /* fill uio infos */ - udev->info.name = "Intel IGB UIO"; + udev->info.name = "igb_uio"; udev->info.version = "0.1"; udev->info.handler = igbuio_pci_irqhandler; udev->info.irqcontrol = igbuio_pci_irqcontrol;
[dpdk-dev] [PATCH 03/10] igb_uio: dont wrap pci_num_vf function needlessly
It is better style to just use the pci_num_vf directly, rather than wrapping it with a local (but globally named) function with the same effect. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:29:57.587687790 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:59:14.0 -0700 @@ -83,9 +83,8 @@ igbuio_get_uio_pci_dev(struct uio_info * } /* sriov sysfs */ -int local_pci_num_vf(struct pci_dev *dev) -{ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) +static int pci_num_vf(struct pci_dev *dev) struct iov { int pos; int nres; @@ -100,17 +99,15 @@ int local_pci_num_vf(struct pci_dev *dev return 0; return iov->nr_virtfn; -#else - return pci_num_vf(dev); -#endif } +#endif static ssize_t show_max_vfs(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, 10, "%u\n", local_pci_num_vf( - container_of(dev, struct pci_dev, dev))); + return snprintf(buf, 10, "%u\n", + pci_num_vf(container_of(dev, struct pci_dev, dev))); } static ssize_t @@ -126,7 +123,7 @@ store_max_vfs(struct device *dev, struct if (0 == max_vfs) pci_disable_sriov(pdev); - else if (0 == local_pci_num_vf(pdev)) + else if (0 == pci_num_vf(pdev)) err = pci_enable_sriov(pdev, max_vfs); else /* do nothing if change max_vfs number */ err = -EINVAL;
[dpdk-dev] [PATCH 05/10] igb_uio: propogate error numbers in probe code
It is good practice to propogate the return values of failing functions so that more information can be reported. The failed result of probe will make it out to errno and get printed by modprobe and will aid in diagnosis of failures. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:42:22.856545033 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:42:47.500631562 -0700 @@ -518,7 +518,7 @@ igbuio_setup_bars(struct pci_dev *dev, s } } - return ((iom != 0) ? ret : ENOENT); + return (iom != 0) ? ret : -ENOENT; } #if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) @@ -530,6 +530,7 @@ igbuio_pci_probe(struct pci_dev *dev, co { struct rte_uio_pci_dev *udev; struct msix_entry msix_entry; + int err; udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); if (!udev) @@ -539,7 +540,8 @@ igbuio_pci_probe(struct pci_dev *dev, co * enable device: ask low-level code to enable I/O and * memory */ - if (pci_enable_device(dev)) { + err = pci_enable_device(dev); + if (err != 0) { dev_err(&dev->dev, "Cannot enable PCI device\n"); goto fail_free; } @@ -548,7 +550,8 @@ igbuio_pci_probe(struct pci_dev *dev, co * reserve device's PCI memory regions for use by this * module */ - if (pci_request_regions(dev, "igb_uio")) { + err = pci_request_regions(dev, "igb_uio"); + if (err != 0) { dev_err(&dev->dev, "Cannot request regions\n"); goto fail_disable; } @@ -557,14 +560,19 @@ igbuio_pci_probe(struct pci_dev *dev, co pci_set_master(dev); /* remap IO memory */ - if (igbuio_setup_bars(dev, &udev->info)) + err = igbuio_setup_bars(dev, &udev->info); + if (err != 0) goto fail_release_iomem; /* set 64-bit DMA mask */ - if (pci_set_dma_mask(dev, DMA_BIT_MASK(64))) { + err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { dev_err(&dev->dev, "Cannot set DMA mask\n"); goto fail_release_iomem; - } else if (pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64))) { + } + + err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); goto fail_release_iomem; } @@ -613,19 +621,22 @@ igbuio_pci_probe(struct pci_dev *dev, co pci_set_drvdata(dev, udev); igbuio_pci_irqcontrol(&udev->info, 0); - if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp)) + err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); + if (err != 0) goto fail_release_iomem; /* register uio driver */ - if (uio_register_device(&dev->dev, &udev->info)) - goto fail_release_iomem; + err = uio_register_device(&dev->dev, &udev->info); + if (err != 0) + goto fail_remove_group; printk(KERN_INFO "uio device registered with irq %lx\n", udev->info.irq); return 0; -fail_release_iomem: +fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); +fail_release_iomem: igbuio_pci_release_iomem(&udev->info); if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(udev->pdev); @@ -635,7 +646,7 @@ fail_disable: fail_free: kfree(udev); - return -ENODEV; + return err; } static void
[dpdk-dev] [PATCH 06/10] igb_uio: make irq mode param read-only
The module parameter is read-only since changing mode after loading isn't going to work. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:42:54.028654483 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:42:54.028654483 -0700 @@ -721,7 +721,7 @@ igbuio_pci_exit_module(void) module_init(igbuio_pci_init_module); module_exit(igbuio_pci_exit_module); -module_param(intr_mode, charp, S_IRUGO | S_IWUSR); +module_param(intr_mode, charp, S_IRUGO); MODULE_PARM_DESC(intr_mode, "igb_uio interrupt mode (default=msix):\n" "" RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n"
[dpdk-dev] [PATCH 07/10] igb_uio: fix IRQ mode handling
This pach reworks how IRQ mode handling is done. The biggest code change is to use the standard INTX management code that exists in more recent kernels (and provide backport version). This also fixes the pci_lock code which was broken, since it was not protecting against config access, and was doing trylock. Make this driver behave like other Linux drivers. Start at MSI-X and degrade to less desireable modes automatically if the desired type is not available. This patch also makes MSI mode work, previously the mode was there but it would never work. Signed-off-by: Stephen Hemminger --- lib/librte_eal/common/include/rte_pci_dev_feature_defs.h |3 lib/librte_eal/linuxapp/igb_uio/igb_uio.c| 243 +++ 2 files changed, 119 insertions(+), 127 deletions(-) --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:43:13.252721981 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:47:06.813542050 -0700 @@ -52,7 +52,6 @@ struct rte_uio_pci_dev { struct uio_info info; struct pci_dev *pdev; - spinlock_t lock; /* spinlock for accessing PCI config space or msix data in multi tasks/isr */ enum rte_intr_mode mode; }; @@ -220,36 +219,67 @@ static const struct attribute_group dev_ .attrs = dev_attrs, }; -static inline int -pci_lock(struct pci_dev * pdev) -{ - /* Some function names changes between 3.2.0 and 3.3.0... */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) - pci_block_user_cfg_access(pdev); - return 1; -#else - return pci_cfg_access_trylock(pdev); -#endif +/* Check if INTX works to control irq's. + * Set's INTX_DISABLE flag and reads it back + */ +static bool pci_intx_mask_supported(struct pci_dev *dev) +{ + bool mask_supported = false; + uint16_t orig, new + + pci_block_user_cfg_access(dev); + pci_read_config_word(pdev, PCI_COMMAND, &orig); + pci_write_config_word(dev, PCI_COMMAND, + orig ^ PCI_COMMAND_INTX_DISABLE); + pci_read_config_word(dev, PCI_COMMAND, &new); + + if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { + dev_err(&dev->dev, "Command register changed from " + "0x%x to 0x%x: driver or hardware bug?\n", orig, new); + } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { + mask_supported = true; + pci_write_config_word(dev, PCI_COMMAND, orig); + } + pci_unblock_user_cfg_access(dev); } -static inline void -pci_unlock(struct pci_dev * pdev) +static bool pci_check_and_mask_intx(struct pci_dev *pdev) { - /* Some function names changes between 3.2.0 and 3.3.0... */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) - pci_unblock_user_cfg_access(pdev); -#else - pci_cfg_access_unlock(pdev); -#endif + bool pending; + uint32_t status; + + pci_block_user_cfg_access(dev); + pci_read_config_dword(pdev, PCI_COMMAND, &status); + + /* interrupt is not ours, goes to out */ + pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); + if (pending) { + uint16_t old, new; + + old = status; + if (state != 0) + new = old & (~PCI_COMMAND_INTX_DISABLE); + else + new = old | PCI_COMMAND_INTX_DISABLE; + + if (old != new) + pci_write_config_word(pdev, PCI_COMMAND, new); + } + pci_unblock_user_cfg_access(dev); + + return pending; } +#endif -/** +/* * It masks the msix on/off of generating MSI-X messages. */ -static int +static void igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) { - uint32_t mask_bits = desc->masked; + u32 mask_bits = desc->masked; unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; @@ -263,48 +293,25 @@ igbuio_msix_mask_irq(struct msi_desc *de readl(desc->mask_base); desc->masked = mask_bits; } - - return 0; } -/** - * This function sets/clears the masks for generating LSC interrupts. - * - * @param info - * The pointer to struct uio_info. - * @param on - * The on/off flag of masking LSC. - * @return - * -On success, zero value. - * -On failure, a negative value. - */ -static int -igbuio_set_interrupt_mask(struct rte_uio_pci_dev *udev, int32_t state) -{ - struct pci_dev *pdev = udev->pdev; - - if (udev->mode == RTE_INTR_MODE_MSIX) { - struct msi_desc *desc; - list_for_each_entry(desc, &pdev->msi_list, list) { - igbuio_msix_mask_irq(desc, state); - } - } else if (udev->mode == RTE_INTR_MODE_LEGACY) { - uint32_t status; - uint16_t old, new; +static void +igbuio_msi_mask_irq(struct irq_data *data, u32 enable) +{ + struct msi_de
[dpdk-dev] [PATCH 08/10] igb_uio: add missing locking to config access
Access to PCI config space should be inside pci_cfg_access_lock to avoid read/modify/write races. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:55:03.959554076 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-06-19 14:58:55.0 -0700 @@ -148,10 +148,13 @@ store_extended_tag(struct device *dev, else return -EINVAL; + pci_cfg_access_lock(pci_dev); pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn, PCI_DEV_CAP_REG, &val); - if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) /* Not supported */ + if (!(val & PCI_DEV_CAP_EXT_TAG_MASK)) { /* Not supported */ + pci_cfg_access_unlock(pci_dev); return -EPERM; + } val = 0; pci_bus_read_config_dword(pci_dev->bus, pci_dev->devfn, @@ -162,6 +165,7 @@ store_extended_tag(struct device *dev, val &= ~PCI_DEV_CTRL_EXT_TAG_MASK; pci_bus_write_config_dword(pci_dev->bus, pci_dev->devfn, PCI_DEV_CTRL_REG, val); + pci_cfg_access_unlock(pci_dev); return count; }
[dpdk-dev] [PATCH 04/10] igb_uio: msix cleanups
Since only one MSI-X entry is ever defined, there is no need to put it as an array in the driver private data structure. One msix_entry can just be put on the stack and initialized there. Also remove the unused backport defines related to MSI-X. I suspect this code was just inherited from some other project and never cleaned up. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:38:29.291724950 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:42:06.460487465 -0700 @@ -37,19 +37,6 @@ #endif #include -/** - * MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39, - * but none of them in kernel 2.6.35. - */ -#ifndef PCI_MSIX_ENTRY_SIZE -#define PCI_MSIX_ENTRY_SIZE 16 -#define PCI_MSIX_ENTRY_LOWER_ADDR 0 -#define PCI_MSIX_ENTRY_UPPER_ADDR 4 -#define PCI_MSIX_ENTRY_DATA 8 -#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 -#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 -#endif - #ifdef RTE_PCI_CONFIG #define PCI_SYS_FILE_BUF_SIZE 10 #define PCI_DEV_CAP_REG0xA4 @@ -59,8 +46,6 @@ #define PCI_DEV_CTRL_EXT_TAG_MASK (1 << PCI_DEV_CTRL_EXT_TAG_SHIFT) #endif -#define IGBUIO_NUM_MSI_VECTORS 1 - /** * A structure describing the private information for a uio device. */ @@ -69,8 +54,6 @@ struct rte_uio_pci_dev { struct pci_dev *pdev; spinlock_t lock; /* spinlock for accessing PCI config space or msix data in multi tasks/isr */ enum rte_intr_mode mode; - struct msix_entry \ - msix_entries[IGBUIO_NUM_MSI_VECTORS]; /* pointer to the msix vectors to be allocated later */ }; static char *intr_mode = NULL; @@ -546,6 +529,7 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; + struct msix_entry msix_entry; udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); if (!udev) @@ -602,12 +586,8 @@ igbuio_pci_probe(struct pci_dev *dev, co /* check if it need to try msix first */ if (igbuio_intr_mode_preferred == RTE_INTR_MODE_MSIX) { - int vector; - - for (vector = 0; vector < IGBUIO_NUM_MSI_VECTORS; vector ++) - udev->msix_entries[vector].entry = vector; - - if (pci_enable_msix(udev->pdev, udev->msix_entries, IGBUIO_NUM_MSI_VECTORS) == 0) { + msix_entry.entry = 0; + if (pci_enable_msix(dev, &msix_entry, 1) == 0) { udev->mode = RTE_INTR_MODE_MSIX; } else { @@ -618,7 +598,7 @@ igbuio_pci_probe(struct pci_dev *dev, co switch (udev->mode) { case RTE_INTR_MODE_MSIX: udev->info.irq_flags = 0; - udev->info.irq = udev->msix_entries[0].vector; + udev->info.irq = msix_entry.vector; break; case RTE_INTR_MODE_MSI: break;
[dpdk-dev] [PATCH 09/10] igb_uio: allow msi mode
Allows msi to be selected as a preferred mode. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:47:24.713604900 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:47:24.709604886 -0700 @@ -646,7 +646,7 @@ fail_release_iomem: igbuio_pci_release_iomem(&udev->info); if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(udev->pdev); - if (udev->mode == RTE_INTR_MODE_MSI) + else if (udev->mode == RTE_INTR_MODE_MSI) pci_disable_msi(udev->pdev); pci_release_regions(dev); fail_disable: @@ -661,6 +661,7 @@ static void igbuio_pci_remove(struct pci_dev *dev) { struct uio_info *info = pci_get_drvdata(dev); + struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info); if (info->priv == NULL) { pr_notice("Not igbuio device\n"); @@ -670,9 +671,10 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(info); igbuio_pci_release_iomem(info); - if (((struct rte_uio_pci_dev *)info->priv)->mode == - RTE_INTR_MODE_MSIX) + if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(dev); + else if (udev->mode == RTE_INTR_MODE_MSI) + pci_disable_msi(dev); pci_release_regions(dev); pci_disable_device(dev); pci_set_drvdata(dev, NULL); @@ -690,6 +692,9 @@ igbuio_config_intr_mode(char *intr_str) if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; pr_info("Use MSIX interrupt\n"); + } else if (!strcmp(intr_str, RTE_INTR_MODE_MSI_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_MSI; + pr_info("Use MSI interrupt\n"); } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; pr_info("Use legacy interrupt\n"); @@ -733,6 +738,7 @@ module_param(intr_mode, charp, S_IRUGO); MODULE_PARM_DESC(intr_mode, "igb_uio interrupt mode (default=msix):\n" "" RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" +"" RTE_INTR_MODE_MSI_NAME " Use MSI interrupt\n" "" RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" "\n");
[dpdk-dev] [PATCH 10/10] igb_uio: fix check patch warnings
Fix whitespace and other problems reported by checkpatch. This didi find a real bug in that the setup code was returning positive value for errors which goes against convention and might have caused a problem. Signed-off-by: Stephen Hemminger --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 08:47:24.709604886 -0700 +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c 2014-07-18 09:01:42.336616148 -0700 @@ -65,7 +65,7 @@ igbuio_get_uio_pci_dev(struct uio_info * } /* sriov sysfs */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) static int pci_num_vf(struct pci_dev *dev) struct iov { int pos; @@ -75,7 +75,7 @@ static int pci_num_vf(struct pci_dev *de u16 total; u16 initial; u16 nr_virtfn; - } *iov = (struct iov*)dev->sriov; + } *iov = (struct iov *)dev->sriov; if (!dev->is_physfn) return 0; @@ -204,9 +204,9 @@ store_max_read_request_size(struct devic static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); #ifdef RTE_PCI_CONFIG -static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, \ +static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, store_extended_tag); -static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, \ +static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, show_max_read_request_size, store_max_read_request_size); #endif @@ -216,7 +216,7 @@ static struct attribute *dev_attrs[] = { &dev_attr_extended_tag.attr, &dev_attr_max_read_request_size.attr, #endif -NULL, + NULL, }; static const struct attribute_group dev_attr_grp = { @@ -224,7 +224,7 @@ static const struct attribute_group dev_ }; -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) /* Check if INTX works to control irq's. * Set's INTX_DISABLE flag and reads it back */ @@ -378,6 +378,7 @@ static int igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) { int idx; + idx = (int)vma->vm_pgoff; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); vma->vm_page_prot.pgprot |= _PAGE_IOMAP; @@ -400,8 +401,9 @@ igbuio_dom0_pci_mmap(struct uio_info *in if (vma->vm_pgoff >= MAX_UIO_MAPS) return -EINVAL; - if(info->mem[vma->vm_pgoff].size == 0) - return -EINVAL; + + if (info->mem[vma->vm_pgoff].size == 0) + return -EINVAL; idx = (int)vma->vm_pgoff; switch (info->mem[idx].memtype) { @@ -423,8 +425,8 @@ igbuio_pci_setup_iomem(struct pci_dev *d unsigned long addr, len; void *internal_addr; - if (sizeof(info->mem) / sizeof (info->mem[0]) <= n) - return (EINVAL); + if (sizeof(info->mem) / sizeof(info->mem[0]) <= n) + return -EINVAL; addr = pci_resource_start(dev, pci_bar); len = pci_resource_len(dev, pci_bar); @@ -448,20 +450,20 @@ igbuio_pci_setup_ioport(struct pci_dev * { unsigned long addr, len; - if (sizeof(info->port) / sizeof (info->port[0]) <= n) - return (EINVAL); + if (sizeof(info->port) / sizeof(info->port[0]) <= n) + return -EINVAL; addr = pci_resource_start(dev, pci_bar); len = pci_resource_len(dev, pci_bar); if (addr == 0 || len == 0) - return (-1); + return -EINVAL; info->port[n].name = name; info->port[n].start = addr; info->port[n].size = len; info->port[n].porttype = UIO_PORT_X86; - return (0); + return 0; } /* Unmap previously ioremap'd resources */ @@ -469,6 +471,7 @@ static void igbuio_pci_release_iomem(struct uio_info *info) { int i; + for (i = 0; i < MAX_UIO_MAPS; i++) { if (info->mem[i].internal_addr) iounmap(info->mem[i].internal_addr); @@ -497,14 +500,16 @@ igbuio_setup_bars(struct pci_dev *dev, s pci_resource_start(dev, i) != 0) { flags = pci_resource_flags(dev, i); if (flags & IORESOURCE_MEM) { - if ((ret = igbuio_pci_setup_iomem(dev, info, - iom, i, bar_names[i])) != 0) - return (ret); + ret = igbuio_pci_setup_iomem(dev, info, iom, +i, bar_names[i]); + if (ret != 0) + return ret; iom++; } else if (flags & IORESOURCE_IO) { - if ((ret = igbuio_pci_setup_ioport(dev, info, - iop, i, bar_names[i])) !
[dpdk-dev] KNI interface
On Sat, Jul 19, 2014 at 12:08:35AM +, Harish Patil wrote: > Hello dpdk-dev, > Went thru? couple of documentation but not very clear to me. Does using > KNI means handing over all the packets received/transmitted by the poll > mode driver to the linux stack or can it be controlled for the control > packets only ? What is the KNI use-case (besides ethtool) and what are the > best options/choices/strategy to use poll mode driver for data path and > linux for control plane operations ? > Hi Harish, the packets to be sent to the kernel through the KNI interface is entirely under application control, so it can be limited to control path packets only. Indeed this is the primary expected use case for this module, as the KNI cannot deal with the kind of data rates that the PMDs can work with. Regards, /Bruce