On 12.07.2016 12:11, Ciara Loftus wrote: > This commit introduces support for DPDK 16.07 and consequently breaks > compatibility with DPDK 16.04. > > DPDK 16.07 introduces some changes to various APIs. These have been > updated in OVS, including: > * xstats API: changes to structure of xstats > * vhost API: replace virtio-net references with 'vid' > > Signed-off-by: Ciara Loftus <ciara.lof...@intel.com> > --- > .travis/linux-build.sh | 2 +- > INSTALL.DPDK-ADVANCED.md | 8 +- > INSTALL.DPDK.md | 20 ++-- > lib/netdev-dpdk.c | 243 > +++++++++++++++++++++++------------------------ > 4 files changed, 135 insertions(+), 138 deletions(-) > > diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh > index 065de39..1b3d43d 100755 > --- a/.travis/linux-build.sh > +++ b/.travis/linux-build.sh > @@ -68,7 +68,7 @@ fi > > if [ "$DPDK" ]; then > if [ -z "$DPDK_VER" ]; then > - DPDK_VER="16.04" > + DPDK_VER="16.07" > fi > install_dpdk $DPDK_VER > if [ "$CC" = "clang" ]; then > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index 9ae536d..ec1de29 100644 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -43,7 +43,7 @@ for DPDK and OVS. > For IVSHMEM case, set `export DPDK_TARGET=x86_64-ivshmem-linuxapp-gcc` > > ``` > - export DPDK_DIR=/usr/src/dpdk-16.04 > + export DPDK_DIR=/usr/src/dpdk-16.07 > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > make install T=$DPDK_TARGET DESTDIR=install > ``` > @@ -339,7 +339,7 @@ For users wanting to do packet forwarding using kernel > stack below are the steps > cd /usr/src/cmdline_generator > wget > https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/cmdline_generator.c > wget > https://raw.githubusercontent.com/netgroup-polito/un-orchestrator/master/orchestrator/compute_controller/plugins/kvm-libvirt/cmdline_generator/Makefile > - export RTE_SDK=/usr/src/dpdk-16.04 > + export RTE_SDK=/usr/src/dpdk-16.07 > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > make > ./build/cmdline_generator -m -p dpdkr0 XXX > @@ -363,7 +363,7 @@ For users wanting to do packet forwarding using kernel > stack below are the steps > mount -t hugetlbfs nodev /dev/hugepages (if not already mounted) > > # Build the DPDK ring application in the VM > - export RTE_SDK=/root/dpdk-16.04 > + export RTE_SDK=/root/dpdk-16.07 > export RTE_TARGET=x86_64-ivshmem-linuxapp-gcc > make > > @@ -374,7 +374,7 @@ For users wanting to do packet forwarding using kernel > stack below are the steps > > ## <a name="vhost"></a> 6. Vhost Walkthrough > > -DPDK 16.04 supports two types of vhost: > +DPDK 16.07 supports two types of vhost: > > 1. vhost-user - enabled default > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index 5407794..9022ad8 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -21,7 +21,7 @@ The DPDK support of Open vSwitch is considered > 'experimental'. > > ### Prerequisites > > -* Required: DPDK 16.04, libnuma > +* Required: DPDK 16.07, libnuma > * Hardware: [DPDK Supported NICs] when physical ports in use > > ## <a name="build"></a> 2. Building and Installation > @@ -42,10 +42,10 @@ advanced install guide [INSTALL.DPDK-ADVANCED.md] > > ``` > cd /usr/src/ > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > - unzip dpdk-16.04.zip > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > + unzip dpdk-16.07.zip > > - export DPDK_DIR=/usr/src/dpdk-16.04 > + export DPDK_DIR=/usr/src/dpdk-16.07 > cd $DPDK_DIR > ``` > > @@ -329,9 +329,9 @@ can be found in [Vhost Walkthrough]. > > ``` > cd /root/dpdk/ > - wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.04.zip > - unzip dpdk-16.04.zip > - export DPDK_DIR=/root/dpdk/dpdk-16.04 > + wget http://dpdk.org/browse/dpdk/snapshot/dpdk-16.07.zip > + unzip dpdk-16.07.zip > + export DPDK_DIR=/root/dpdk/dpdk-16.07 > export DPDK_TARGET=x86_64-native-linuxapp-gcc > export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET > cd $DPDK_DIR > @@ -487,7 +487,7 @@ can be found in [Vhost Walkthrough]. > </disk> > <disk type='dir' device='disk'> > <driver name='qemu' type='fat'/> > - <source dir='/usr/src/dpdk-16.04'/> > + <source dir='/usr/src/dpdk-16.07'/> > <target dev='vdb' bus='virtio'/> > <readonly/> > </disk> > @@ -557,9 +557,9 @@ can be found in [Vhost Walkthrough]. > DPDK. It is recommended that users update Network Interface firmware to > match what has been validated for the DPDK release. > > - For DPDK 16.04, the list of validated firmware versions can be found at: > + For DPDK 16.07, the list of validated firmware versions can be found at: > > - http://dpdk.org/doc/guides/rel_notes/release_16_04.html > + http://dpdk.org/doc/guides/rel_notes/release_16.07.html > > > Bug Reporting: > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 85b18fd..9cf0b0c 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -356,8 +356,8 @@ struct netdev_dpdk { > * always true. */ > bool txq_needs_locking; > > - /* virtio-net structure for vhost device */ > - OVSRCU_TYPE(struct virtio_net *) virtio_dev; > + /* virtio identifier for vhost devices */ > + int vid; > > /* Identifier used to distinguish vhost devices from each other */ > char vhost_id[PATH_MAX]; > @@ -393,8 +393,6 @@ static bool dpdk_thread_is_pmd(void); > > static int netdev_dpdk_construct(struct netdev *); > > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev); > - > struct ingress_policer * > netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev); > > @@ -749,6 +747,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int > port_no, > dev->flags = 0; > dev->mtu = ETHER_MTU; > dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > + dev->vid = -1; > > buf_size = dpdk_buf_size(dev->mtu); > dev->dpdk_mp = dpdk_mp_get(dev->socket_id, FRAME_LEN_TO_MTU(buf_size)); > @@ -846,6 +845,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev) > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > const char *name = netdev->name; > int err; > + uint64_t flags = 0; > > /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in > * the file system. '/' or '\' would traverse directories, so they're not > @@ -868,7 +868,7 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev) > snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s", > vhost_sock_dir, name); > > - err = rte_vhost_driver_register(dev->vhost_id); > + err = rte_vhost_driver_register(dev->vhost_id, flags); > if (err) { > VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > dev->vhost_id); > @@ -923,13 +923,19 @@ netdev_dpdk_destruct(struct netdev *netdev) > ovs_mutex_unlock(&dpdk_mutex); > } > > +static bool > +is_vhost_running(int vid) > +{ > + return vid >= 0; > +} > + > static void > netdev_dpdk_vhost_destruct(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > > /* Guest becomes an orphan if still attached. */ > - if (netdev_dpdk_get_virtio(dev) != NULL) { > + if (is_vhost_running(dev->vid)) { > VLOG_ERR("Removing port '%s' while vhost device still attached.", > netdev->name); > VLOG_ERR("To restore connectivity after re-adding of port, VM on > socket" > @@ -1143,12 +1149,6 @@ ingress_policer_run(struct ingress_policer *policer, > struct rte_mbuf **pkts, > return cnt; > } > > -static bool > -is_vhost_running(struct virtio_net *virtio_dev) > -{ > - return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING)); > -} > - > static inline void > netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats, > unsigned int packet_size) > @@ -1218,18 +1218,17 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, > struct dp_packet **packets, int *c) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); > - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > int qid = rxq->queue_id; > struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev); > uint16_t nb_rx = 0; > uint16_t dropped = 0; > > - if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) > + if (OVS_UNLIKELY(!is_vhost_running(dev->vid) > || !(dev->flags & NETDEV_UP))) { > return EAGAIN; > } > > - nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM + > VIRTIO_TXQ, > + nb_rx = rte_vhost_dequeue_burst(dev->vid, qid * VIRTIO_QNUM + VIRTIO_TXQ, > dev->dpdk_mp->mp, > (struct rte_mbuf **)packets, > NETDEV_MAX_BURST); > @@ -1326,7 +1325,6 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, > bool may_steal) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts; > unsigned int total_pkts = cnt; > unsigned int qos_pkts = cnt; > @@ -1334,7 +1332,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, > > qid = dev->tx_q[qid % netdev->n_txq].map; > > - if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0 > + if (OVS_UNLIKELY(!is_vhost_running(dev->vid) || qid < 0 > || !(dev->flags & NETDEV_UP))) { > rte_spinlock_lock(&dev->stats_lock); > dev->stats.tx_dropped+= cnt; > @@ -1352,7 +1350,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid, > int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; > unsigned int tx_pkts; > > - tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid, > + tx_pkts = rte_vhost_enqueue_burst(dev->vid, vhost_qid, > cur_pkts, cnt); > if (OVS_LIKELY(tx_pkts)) { > /* Packets have been sent.*/ > @@ -1707,60 +1705,50 @@ netdev_dpdk_vhost_get_stats(const struct netdev > *netdev, > > static void > netdev_dpdk_convert_xstats(struct netdev_stats *stats, > - const struct rte_eth_xstats *xstats, > + const struct rte_eth_xstat *xstats, > + const struct rte_eth_xstat_name *names, > const unsigned int size) > { > - /* XXX Current implementation is simple search through an array > - * to find hardcoded counter names. In future DPDK release (TBD) > - * XSTATS API will change so each counter will be represented by > - * unique ID instead of String. */ > - > for (unsigned int i = 0; i < size; i++) { > - if (strcmp(XSTAT_RX_64_PACKETS, xstats[i].name) == 0) { > + if (strcmp(XSTAT_RX_64_PACKETS, names[i].name) == 0) { > stats->rx_1_to_64_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_65_TO_127_PACKETS, names[i].name) == 0) { > stats->rx_65_to_127_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, xstats[i].name) == 0) > { > + } else if (strcmp(XSTAT_RX_128_TO_255_PACKETS, names[i].name) == 0) { > stats->rx_128_to_255_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, xstats[i].name) == 0) > { > + } else if (strcmp(XSTAT_RX_256_TO_511_PACKETS, names[i].name) == 0) { > stats->rx_256_to_511_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_512_TO_1023_PACKETS, names[i].name) == 0) > { > stats->rx_512_to_1023_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_1024_TO_1522_PACKETS, names[i].name) == > 0) { > stats->rx_1024_to_1522_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_1523_TO_MAX_PACKETS, names[i].name) == 0) > { > stats->rx_1523_to_max_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_64_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_64_PACKETS, names[i].name) == 0) { > stats->tx_1_to_64_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_65_TO_127_PACKETS, names[i].name) == 0) { > stats->tx_65_to_127_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, xstats[i].name) == 0) > { > + } else if (strcmp(XSTAT_TX_128_TO_255_PACKETS, names[i].name) == 0) { > stats->tx_128_to_255_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, xstats[i].name) == 0) > { > + } else if (strcmp(XSTAT_TX_256_TO_511_PACKETS, names[i].name) == 0) { > stats->tx_256_to_511_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_512_TO_1023_PACKETS, names[i].name) == 0) > { > stats->tx_512_to_1023_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_1024_TO_1522_PACKETS, names[i].name) == > 0) { > stats->tx_1024_to_1522_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS, > - xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_1523_TO_MAX_PACKETS, names[i].name) == 0) > { > stats->tx_1523_to_max_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_MULTICAST_PACKETS, names[i].name) == 0) { > stats->tx_multicast_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_BROADCAST_PACKETS, names[i].name) == 0) { > stats->rx_broadcast_packets = xstats[i].value; > - } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_TX_BROADCAST_PACKETS, names[i].name) == 0) { > stats->tx_broadcast_packets = xstats[i].value; > - } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_UNDERSIZED_ERRORS, names[i].name) == 0) { > stats->rx_undersized_errors = xstats[i].value; > - } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_FRAGMENTED_ERRORS, names[i].name) == 0) { > stats->rx_fragmented_errors = xstats[i].value; > - } else if (strcmp(XSTAT_RX_JABBER_ERRORS, xstats[i].name) == 0) { > + } else if (strcmp(XSTAT_RX_JABBER_ERRORS, names[i].name) == 0) { > stats->rx_jabber_errors = xstats[i].value; > } > } > @@ -1776,7 +1764,8 @@ netdev_dpdk_get_stats(const struct netdev *netdev, > struct netdev_stats *stats) > netdev_dpdk_get_carrier(netdev, &gg); > ovs_mutex_lock(&dev->mutex); > > - struct rte_eth_xstats *rte_xstats; > + struct rte_eth_xstat *rte_xstats; > + struct rte_eth_xstat_name *rte_xstats_names; > int rte_xstats_len, rte_xstats_ret; > > if (rte_eth_stats_get(dev->port_id, &rte_stats)) { > @@ -1785,20 +1774,51 @@ netdev_dpdk_get_stats(const struct netdev *netdev, > struct netdev_stats *stats) > return EPROTO; > } > > - rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0); > - if (rte_xstats_len > 0) { > - rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len); > - memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len); > - rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats, > - rte_xstats_len); > - if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) { > - netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret); > - } > + /* Get length of statistics */ > + rte_xstats_len = rte_eth_xstats_get_names(dev->port_id, NULL, 0); > + if (rte_xstats_len < 0) { > + VLOG_WARN("Cannot get XSTATS values for port: %i", dev->port_id); > + goto out; > + } > + /* Reserve memory for xstats names */ > + rte_xstats_names = dpdk_rte_mzalloc(sizeof(*rte_xstats_names) > + * rte_xstats_len); > + if (rte_xstats_names == NULL) { > + VLOG_WARN("Cannot allocate memory for XSTATS names for port %i", > + dev->port_id); > + rte_free(rte_xstats_names); > + goto out; > + } > + /* Reserve memory for xstats values */ > + rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len); > + if (rte_xstats == NULL) { > + VLOG_WARN("Cannot allocate memory for XSTATS values for port %i", > + dev->port_id); > rte_free(rte_xstats); > + goto out; > + } > + /* Retreive xstats names */ > + if (rte_xstats_len != rte_eth_xstats_get_names(dev->port_id, > + rte_xstats_names, rte_xstats_len)) { > + VLOG_WARN("Cannot get XSTATS names for port: %i.", dev->port_id); > + rte_free(rte_xstats); > + rte_free(rte_xstats_names); > + goto out; > + } > + /* Retreive xstats values */ > + memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len); > + rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats, > + rte_xstats_len); > + if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) { > + netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_names, > + rte_xstats_len); > } else { > - VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id); > + VLOG_WARN("Cannot get XSTATS values for port: %i.", dev->port_id); > + rte_free(rte_xstats); > + rte_free(rte_xstats_names); > } > > +out: > stats->rx_packets = rte_stats.ipackets; > stats->tx_packets = rte_stats.opackets; > stats->rx_bytes = rte_stats.ibytes; > @@ -1806,7 +1826,6 @@ netdev_dpdk_get_stats(const struct netdev *netdev, > struct netdev_stats *stats) > /* DPDK counts imissed as errors, but count them here as dropped instead > */ > stats->rx_errors = rte_stats.ierrors - rte_stats.imissed; > stats->tx_errors = rte_stats.oerrors; > - stats->multicast = rte_stats.imcasts; > > rte_spinlock_lock(&dev->stats_lock); > stats->tx_dropped = dev->stats.tx_dropped; > @@ -1973,11 +1992,10 @@ static int > netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > > ovs_mutex_lock(&dev->mutex); > > - if (is_vhost_running(virtio_dev)) { > + if (is_vhost_running(dev->vid)) { > *carrier = 1; > } else { > *carrier = 0; > @@ -2045,10 +2063,9 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev, > /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is > * running then change netdev's change_seq to trigger link state > * update. */ > - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > > if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off))) > - && is_vhost_running(virtio_dev)) { > + && is_vhost_running(dev->vid)){ > netdev_change_seq_changed(&dev->up); > > /* Clear statistics if device is getting up. */ > @@ -2176,15 +2193,15 @@ netdev_dpdk_set_admin_state(struct unixctl_conn > *conn, int argc, > * Set virtqueue flags so that we do not receive interrupts. > */ > static void > -set_irq_status(struct virtio_net *virtio_dev) > +set_irq_status(int vid) > { > uint32_t i; > uint64_t idx; > > - for (i = 0; i < virtio_dev->virt_qp_nb; i++) { > + for (i = 0; i < rte_vhost_get_queue_num(vid); i++) { > idx = i * VIRTIO_QNUM; > - rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0); > - rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0); > + rte_vhost_enable_guest_notification(vid, idx + VIRTIO_RXQ, 0); > + rte_vhost_enable_guest_notification(vid, idx + VIRTIO_TXQ, 0); > } > } > > @@ -2233,26 +2250,27 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev) > * A new virtio-net device is added to a vhost port. > */ > static int > -new_device(struct virtio_net *virtio_dev) > +new_device(int vid) > { > struct netdev_dpdk *dev; > bool exists = false; > int newnode = 0; > - long err = 0; > + char ifname[PATH_MAX]; > + > + rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); > > ovs_mutex_lock(&dpdk_mutex); > /* Add device to the vhost port with the same name as that passed down. > */ > LIST_FOR_EACH(dev, list_node, &dpdk_list) { > - if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) { > - uint32_t qp_num = virtio_dev->virt_qp_nb; > + if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) { > + uint32_t qp_num = rte_vhost_get_queue_num(vid); > > ovs_mutex_lock(&dev->mutex); > /* Get NUMA information */ > - err = get_mempolicy(&newnode, NULL, 0, virtio_dev, > - MPOL_F_NODE | MPOL_F_ADDR); > - if (err) { > - VLOG_INFO("Error getting NUMA info for vHost Device '%s'", > - virtio_dev->ifname); > + newnode = rte_vhost_get_numa_node(vid); > + if ((newnode != -1) && (newnode != dev->socket_id)) { > + dev->requested_socket_id = newnode; > + } else { > newnode = dev->socket_id; > }
'requested_socket_id' will be updated on the next line. How about just this: /* Get NUMA information */ newnode = rte_vhost_get_numa_node(vid); if (newnode == -1) { newnode = dev->socket_id; } > > @@ -2261,11 +2279,11 @@ new_device(struct virtio_net *virtio_dev) > dev->requested_n_txq = qp_num; > netdev_request_reconfigure(&dev->up); > > - ovsrcu_set(&dev->virtio_dev, virtio_dev); > + dev->vid = vid; > exists = true; > > /* Disable notifications. */ > - set_irq_status(virtio_dev); > + set_irq_status(dev->vid); > netdev_change_seq_changed(&dev->up); > ovs_mutex_unlock(&dev->mutex); > break; > @@ -2274,14 +2292,14 @@ new_device(struct virtio_net *virtio_dev) > ovs_mutex_unlock(&dpdk_mutex); > > if (!exists) { > - VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not " > - "found", virtio_dev->ifname, virtio_dev->device_fh); > + VLOG_INFO("vHost Device '%s' can't be added - name not found", > ifname); > > return -1; > } > > - VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node %i", > - virtio_dev->ifname, virtio_dev->device_fh, newnode); > + VLOG_INFO("vHost Device '%s' has been added on numa node %i", > + ifname, newnode); > + > return 0; > } > > @@ -2304,18 +2322,20 @@ netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev) > * the device. > */ > static void > -destroy_device(volatile struct virtio_net *virtio_dev) > +destroy_device(int vid) > { > struct netdev_dpdk *dev; > bool exists = false; > + char ifname[PATH_MAX]; > + > + rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); > > ovs_mutex_lock(&dpdk_mutex); > LIST_FOR_EACH (dev, list_node, &dpdk_list) { > - if (netdev_dpdk_get_virtio(dev) == virtio_dev) { > + if (dev->vid == vid) { > > ovs_mutex_lock(&dev->mutex); > - virtio_dev->flags &= ~VIRTIO_DEV_RUNNING; > - ovsrcu_set(&dev->virtio_dev, NULL); > + dev->vid = -1; > /* Clear tx/rx queue settings. */ > netdev_dpdk_txq_map_clear(dev); > dev->requested_n_rxq = NR_QUEUE; > @@ -2332,31 +2352,21 @@ destroy_device(volatile struct virtio_net *virtio_dev) > ovs_mutex_unlock(&dpdk_mutex); > > if (exists == true) { > - /* > - * Wait for other threads to quiesce after setting the 'virtio_dev' > - * to NULL, before returning. > - */ > - ovsrcu_synchronize(); > - /* > - * As call to ovsrcu_synchronize() will end the quiescent state, > - * put thread back into quiescent state before returning. > - */ > - ovsrcu_quiesce_start(); > - VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed", > - virtio_dev->ifname, virtio_dev->device_fh); > + VLOG_INFO("vHost Device '%s' has been removed", ifname); 'ovsrcu_synchronize()' here was used to be sure that all PMD threads are stopped their rx/tx operations on this vhost device. Without it there will be race between freeing of vhost device inside DPDK and rx/tx operations on it. You should not remove this block of code. Comments, I guess, should be fixed somehow. > } else { > - VLOG_INFO("vHost Device '%s' %"PRIu64" not found", > virtio_dev->ifname, > - virtio_dev->device_fh); > + VLOG_INFO("vHost Device '%s' not found", ifname); > } > } > > static int > -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id, > - int enable) > +vring_state_changed(int vid, uint16_t queue_id, int enable) > { > struct netdev_dpdk *dev; > bool exists = false; > int qid = queue_id / VIRTIO_QNUM; > + char ifname[PATH_MAX]; > + > + rte_vhost_get_ifname(vid, ifname, sizeof(ifname)); > > if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) { > return 0; > @@ -2364,7 +2374,7 @@ vring_state_changed(struct virtio_net *virtio_dev, > uint16_t queue_id, > > ovs_mutex_lock(&dpdk_mutex); > LIST_FOR_EACH (dev, list_node, &dpdk_list) { > - if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) { > + if (strncmp(ifname, dev->vhost_id, PATH_MAX) == 0) { > ovs_mutex_lock(&dev->mutex); > if (enable) { > dev->tx_q[qid].map = qid; > @@ -2380,25 +2390,17 @@ vring_state_changed(struct virtio_net *virtio_dev, > uint16_t queue_id, > ovs_mutex_unlock(&dpdk_mutex); > > if (exists) { > - VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %" > - PRIu64" changed to \'%s\'", queue_id, qid, > - virtio_dev->ifname, virtio_dev->device_fh, > + VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s'" > + "changed to \'%s\'", queue_id, qid, ifname, > (enable == 1) ? "enabled" : "disabled"); > } else { > - VLOG_INFO("vHost Device '%s' %"PRIu64" not found", > virtio_dev->ifname, > - virtio_dev->device_fh); > + VLOG_INFO("vHost Device '%s' not found", ifname); > return -1; > } > > return 0; > } > > -struct virtio_net * > -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev) > -{ > - return ovsrcu_get(struct virtio_net *, &dev->virtio_dev); > -} > - > struct ingress_policer * > netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev) > { > @@ -2848,7 +2850,6 @@ static int > netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > - struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev); > int err = 0; > > ovs_mutex_lock(&dpdk_mutex); > @@ -2874,10 +2875,6 @@ netdev_dpdk_vhost_user_reconfigure(struct netdev > *netdev) > } > } > > - if (virtio_dev) { > - virtio_dev->flags |= VIRTIO_DEV_RUNNING; > - } > - This part of code was moved here from 'new_device()' to postpone using of this vhost device by PMD threads until first reconfiguration. (Possible issues: wrong numa node, only 1 queue enabled.) Maybe we can use another flag or 'vid' itself for the purpose of postponing? > ovs_mutex_unlock(&dev->mutex); > ovs_mutex_unlock(&dpdk_mutex); > > @@ -3342,7 +3339,7 @@ dpdk_init__(const struct smap *ovs_other_config) > /* Register CUSE device to handle IOCTLs. > * Unless otherwise specified, cuse_dev_name is set to vhost-net. > */ > - err = rte_vhost_driver_register(cuse_dev_name); > + err = rte_vhost_driver_register(cuse_dev_name, 0); > > if (err != 0) { > VLOG_ERR("CUSE device setup failure."); > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev