Thanks for v2, applied to master. 2016-06-13 3:10 GMT-07:00 Ciara Loftus <ciara.lof...@intel.com>:
> This commit allows for vHost User memory from QEMU, DPDK and OVS, as > well as the servicing PMD, to all come from the same socket. > > The socket id of a vhost-user port used to be set to that of the master > lcore. Now it is possible to update the socket id if it is detected > (during VM boot) that the vhost device memory is not on this node. If > this is the case, a new mempool is created from the new node, and the > PMD thread currently servicing the port will no longer, in favour of a > thread from the new node (if enabled in the pmd-cpu-mask). > > To avail of this functionality, one must enable the > CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option. > > Signed-off-by: Ciara Loftus <ciara.lof...@intel.com> > --- > > v2: > - Remove numactl dependencies from travis & fedora spec files > - Updated log message > > .travis.yml | 1 + > INSTALL.DPDK.md | 8 ++++++-- > NEWS | 3 +++ > acinclude.m4 | 2 +- > lib/netdev-dpdk.c | 37 ++++++++++++++++++++++++++++++++++--- > rhel/openvswitch-fedora.spec.in | 2 ++ > 6 files changed, 47 insertions(+), 6 deletions(-) > > diff --git a/.travis.yml b/.travis.yml > index ee2cf21..6c818cb 100644 > --- a/.travis.yml > +++ b/.travis.yml > @@ -11,6 +11,7 @@ addons: > packages: > - bc > - gcc-multilib > + - libnuma-dev > - libssl-dev > - llvm-dev > - libjemalloc1 > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index c2e32bf..00e75bd 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support. > Building and Installing: > ------------------------ > > -Required: DPDK 16.04 > +Required: DPDK 16.04, libnuma > Optional (if building with vhost-cuse): `fuse`, `fuse-devel` > (`libfuse-dev` > on Debian/Ubuntu) > > @@ -465,7 +465,11 @@ Performance Tuning: > > It is good practice to ensure that threads that are in the datapath are > pinned to cores in the same NUMA area. e.g. pmd threads and QEMU vCPUs > - responsible for forwarding. > + responsible for forwarding. If DPDK is built with > + CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports automatically > + detect the NUMA socket of the QEMU vCPUs and will be serviced by a PMD > + from the same node provided a core on this node is enabled in the > + pmd-cpu-mask. > > 9. Rx Mergeable buffers > > diff --git a/NEWS b/NEWS > index ba201cf..fe24449 100644 > --- a/NEWS > +++ b/NEWS > @@ -33,6 +33,9 @@ Post-v2.5.0 > arguments. Additional arguments can be passed via the dpdk-extra > entry. > * Add ingress policing functionality. > + * PMD threads servicing vHost User ports can now come from the NUMA > + node that device memory is located on if > CONFIG_RTE_LIBRTE_VHOST_NUMA > + is enabled in DPDK. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: > diff --git a/acinclude.m4 b/acinclude.m4 > index 0a14856..3978980 100644 > --- a/acinclude.m4 > +++ b/acinclude.m4 > @@ -219,7 +219,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > DPDKLIB_FOUND=false > save_LIBS=$LIBS > for extras in "" "-ldl"; do > - LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB" > + LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma" > AC_LINK_IFELSE( > [AC_LANG_PROGRAM([#include <rte_config.h> > #include <rte_eal.h>], > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 19d355f..fc683e9 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -30,6 +30,7 @@ > #include <sys/types.h> > #include <sys/stat.h> > #include <getopt.h> > +#include <numaif.h> > > #include "dirs.h" > #include "dp-packet.h" > @@ -385,6 +386,9 @@ struct netdev_dpdk { > int requested_n_txq; > int requested_n_rxq; > > + /* Socket ID detected when vHost device is brought up */ > + int requested_socket_id; > + > /* Ingress Policer */ > OVSRCU_TYPE(struct ingress_policer *) ingress_policer; > uint32_t policer_rate; > @@ -761,6 +765,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int > port_no, > } > > dev->socket_id = sid < 0 ? SOCKET0 : sid; > + dev->requested_socket_id = dev->socket_id; > dev->port_id = port_no; > dev->type = type; > dev->flags = 0; > @@ -2344,6 +2349,8 @@ new_device(struct virtio_net *virtio_dev) > { > struct netdev_dpdk *dev; > bool exists = false; > + int newnode = 0; > + long err = 0; > > ovs_mutex_lock(&dpdk_mutex); > /* Add device to the vhost port with the same name as that passed > down. */ > @@ -2357,6 +2364,19 @@ new_device(struct virtio_net *virtio_dev) > } > ovsrcu_set(&dev->virtio_dev, virtio_dev); > exists = true; > + > + /* Get NUMA information */ > + err = get_mempolicy(&newnode, NULL, 0, virtio_dev, > + MPOL_F_NODE | MPOL_F_ADDR); > + if (err) { > + VLOG_INFO("Error getting NUMA info for vHost Device '%s'", > + virtio_dev->ifname); > + newnode = dev->socket_id; > + } else if (newnode != dev->socket_id) { > + dev->requested_socket_id = newnode; > + netdev_request_reconfigure(&dev->up); > + } > + > virtio_dev->flags |= VIRTIO_DEV_RUNNING; > /* Disable notifications. */ > set_irq_status(virtio_dev); > @@ -2374,8 +2394,8 @@ new_device(struct virtio_net *virtio_dev) > return -1; > } > > - VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", > virtio_dev->ifname, > - virtio_dev->device_fh); > + VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node > %i", > + virtio_dev->ifname, virtio_dev->device_fh, newnode); > return 0; > } > > @@ -2937,6 +2957,7 @@ static int > netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int err = 0; > > ovs_mutex_lock(&dpdk_mutex); > ovs_mutex_lock(&dev->mutex); > @@ -2944,10 +2965,20 @@ netdev_dpdk_vhost_user_reconfigure(struct netdev > *netdev) > netdev->n_txq = dev->requested_n_txq; > netdev->n_rxq = dev->requested_n_rxq; > > + if (dev->requested_socket_id != dev->socket_id) { > + dev->socket_id = dev->requested_socket_id; > + /* Change mempool to new NUMA Node */ > + dpdk_mp_put(dev->dpdk_mp); > + dev->dpdk_mp = dpdk_mp_get(dev->socket_id, dev->mtu); > + if (!dev->dpdk_mp) { > + err = ENOMEM; > + } > + } > + > ovs_mutex_unlock(&dev->mutex); > ovs_mutex_unlock(&dpdk_mutex); > > - return 0; > + return err; > } > > static int > diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/ > openvswitch-fedora.spec.in > index 0759096..959c90a 100644 > --- a/rhel/openvswitch-fedora.spec.in > +++ b/rhel/openvswitch-fedora.spec.in > @@ -54,6 +54,8 @@ BuildRequires: libcap-ng libcap-ng-devel > %endif > %if %{with dpdk} > BuildRequires: dpdk-devel >= 2.2.0 > +BuildRequires: numactl-devel > +Requires: numactl-libs > Provides: %{name}-dpdk = %{version}-%{release} > %endif > > -- > 2.4.3 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev