>-----Original Message----- >From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara Loftus >Sent: Tuesday, May 24, 2016 2:15 PM >To: dev@openvswitch.org >Subject: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User > >This commit allows for vHost User memory from QEMU, DPDK and OVS, as >well as the servicing PMD, to all come from the same socket. > >The socket id of a vhost-user port used to be set to that of the master lcore. >Now it is possible to update the socket id if it is detected (during VM boot) >that the vhost device memory is not on this node. If this is the case, a new >mempool is created from the new node, and the PMD thread currently >servicing the port will no longer, in favour of a thread from the new node (if >enabled in the pmd-cpu-mask). > >To avail of this functionality, one must enable the >CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option. > >Signed-off-by: Ciara Loftus <ciara.lof...@intel.com> >--- > .travis.yml | 3 +++ > INSTALL.DPDK.md | 8 ++++++-- > NEWS | 3 +++ > acinclude.m4 | 2 +- > lib/netdev-dpdk.c | 37 ++++++++++++++++++++++++++++++++++--- > rhel/openvswitch-fedora.spec.in | 1 + > 6 files changed, 48 insertions(+), 6 deletions(-) > >diff --git a/.travis.yml b/.travis.yml >index ee2cf21..faba325 100644 >--- a/.travis.yml >+++ b/.travis.yml >@@ -11,10 +11,13 @@ addons: > packages: > - bc > - gcc-multilib >+ - libnuma1 >+ - libnuma-dev > - libssl-dev > - llvm-dev > - libjemalloc1 > - libjemalloc-dev >+ - numactl > > before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh > >diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 93f92e4..bbe0234 >100644 >--- a/INSTALL.DPDK.md >+++ b/INSTALL.DPDK.md >@@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support. > Building and Installing: > ------------------------ > >-Required: DPDK 16.04 >+Required: DPDK 16.04, libnuma
The change above makes libnuma mandatory to build OVS with DPDK datapath. The config option CONFIG_RTE_LIBRTE_VHOST_NUMA is disabled by default in DPDK-16.04 and hence steps to enable this option and build DPDK may have to be captured in "Configure build & Install DPDK" section of the install guide. > Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev` > on >Debian/Ubuntu) > >@@ -443,7 +443,11 @@ Performance Tuning: > > It is good practice to ensure that threads that are in the datapath are > pinned to cores in the same NUMA area. e.g. pmd threads and QEMU >vCPUs >- responsible for forwarding. >+ responsible for forwarding. If DPDK is built with >+ CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports >automatically >+ detect the NUMA socket of the QEMU vCPUs and will be serviced by a >PMD >+ from the same node provided a core on this node is enabled in the >+ pmd-cpu-mask. > > 9. Rx Mergeable buffers > >diff --git a/NEWS b/NEWS >index 4e81cad..24ca39f 100644 >--- a/NEWS >+++ b/NEWS >@@ -32,6 +32,9 @@ Post-v2.5.0 > * DB entries have been added for many of the DPDK EAL command line > arguments. Additional arguments can be passed via the dpdk-extra > entry. >+ * PMD threads servicing vHost User ports can now come from the NUMA >+ node that device memory is located on if >CONFIG_RTE_LIBRTE_VHOST_NUMA >+ is enabled in DPDK. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > - ovs-appctl: >diff --git a/acinclude.m4 b/acinclude.m4 index f3de855..99ddf04 100644 >--- a/acinclude.m4 >+++ b/acinclude.m4 >@@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [ > DPDKLIB_FOUND=false > save_LIBS=$LIBS > for extras in "" "-ldl"; do >- LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB" >+ LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma" The above change makes libnuma mandatory for configuring OVS using DPDK datapath while ' CONFIG_RTE_LIBRTE_VHOST_NUMA' is disabled by default. IMHO, can we check if LIBRTE_VHOST_NUMA is enabled(from rte_config.h) and append "lnuma" only when it is true. This is inline with how we handle VHOST CUSE case. > AC_LINK_IFELSE( > [AC_LANG_PROGRAM([#include <rte_config.h> > #include <rte_eal.h>], diff --git > a/lib/netdev-dpdk.c >b/lib/netdev-dpdk.c index 0d1b8c9..ad6c4bb 100644 >--- a/lib/netdev-dpdk.c >+++ b/lib/netdev-dpdk.c >@@ -30,6 +30,7 @@ > #include <sys/types.h> > #include <sys/stat.h> > #include <getopt.h> >+#include <numaif.h> > > #include "dirs.h" > #include "dp-packet.h" >@@ -378,6 +379,9 @@ struct netdev_dpdk { > * netdev_dpdk*_reconfigure() is called */ > int requested_n_txq; > int requested_n_rxq; >+ >+ /* Socket ID detected when vHost device is brought up */ >+ int requested_socket_id; > }; > > struct netdev_rxq_dpdk { >@@ -747,6 +751,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int >port_no, > } > > dev->socket_id = sid < 0 ? SOCKET0 : sid; >+ dev->requested_socket_id = dev->socket_id; > dev->port_id = port_no; > dev->type = type; > dev->flags = 0; >@@ -2149,6 +2154,8 @@ new_device(struct virtio_net *virtio_dev) { > struct netdev_dpdk *dev; > bool exists = false; >+ int newnode = 0; >+ long err = 0; > > ovs_mutex_lock(&dpdk_mutex); > /* Add device to the vhost port with the same name as that passed down. >*/ @@ -2162,6 +2169,19 @@ new_device(struct virtio_net *virtio_dev) > } > ovsrcu_set(&dev->virtio_dev, virtio_dev); > exists = true; >+ >+ /* Get NUMA information */ >+ err = get_mempolicy(&newnode, NULL, 0, virtio_dev, >+ MPOL_F_NODE | MPOL_F_ADDR); >+ if (err) { >+ VLOG_INFO("Error getting NUMA info for vHost Device '%s'", >+ virtio_dev->ifname); >+ newnode = dev->socket_id; >+ } else if (newnode != dev->socket_id) { >+ dev->requested_socket_id = newnode; >+ netdev_request_reconfigure(&dev->up); >+ } >+ > virtio_dev->flags |= VIRTIO_DEV_RUNNING; > /* Disable notifications. */ > set_irq_status(virtio_dev); @@ -2178,8 +2198,8 @@ > new_device(struct >virtio_net *virtio_dev) > return -1; > } > >- VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", virtio_dev- >>ifname, >- virtio_dev->device_fh); >+ VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on socket %i", >+ virtio_dev->ifname, virtio_dev->device_fh, newnode); > return 0; > } > >@@ -2760,6 +2780,7 @@ static int > netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev) { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); >+ int err = 0; > > ovs_mutex_lock(&dpdk_mutex); > ovs_mutex_lock(&dev->mutex); >@@ -2767,10 +2788,20 @@ netdev_dpdk_vhost_user_reconfigure(struct >netdev *netdev) > netdev->n_txq = dev->requested_n_txq; > netdev->n_rxq = dev->requested_n_rxq; > >+ if (dev->requested_socket_id != dev->socket_id) { >+ dev->socket_id = dev->requested_socket_id; >+ /* Change mempool to new NUMA Node */ >+ dpdk_mp_put(dev->dpdk_mp); >+ dev->dpdk_mp = dpdk_mp_get(dev->socket_id, dev->mtu); >+ if (!dev->dpdk_mp) { >+ err = ENOMEM; >+ } >+ } >+ > ovs_mutex_unlock(&dev->mutex); > ovs_mutex_unlock(&dpdk_mutex); > >- return 0; >+ return err; > } > > static int >diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch- >fedora.spec.in index 0759096..e360d4d 100644 >--- a/rhel/openvswitch-fedora.spec.in >+++ b/rhel/openvswitch-fedora.spec.in >@@ -54,6 +54,7 @@ BuildRequires: libcap-ng libcap-ng-devel %endif %if >%{with dpdk} > BuildRequires: dpdk-devel >= 2.2.0 >+BuildRequires: numactl numactl-devel numactl-libs > Provides: %{name}-dpdk = %{version}-%{release} %endif > >-- >2.4.3 > >_______________________________________________ >dev mailing list >dev@openvswitch.org >http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev