>-----Original Message-----
>From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ciara Loftus
>Sent: Tuesday, May 24, 2016 2:15 PM
>To: dev@openvswitch.org
>Subject: [ovs-dev] [PATCH] netdev-dpdk: NUMA Aware vHost User
>
>This commit allows for vHost User memory from QEMU, DPDK and OVS, as
>well as the servicing PMD, to all come from the same socket.
>
>The socket id of a vhost-user port used to be set to that of the master lcore.
>Now it is possible to update the socket id if it is detected (during VM boot)
>that the vhost device memory is not on this node. If this is the case, a new
>mempool is created from the new node, and the PMD thread currently
>servicing the port will no longer, in favour of a thread from the new node (if
>enabled in the pmd-cpu-mask).
>
>To avail of this functionality, one must enable the
>CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option.
>
>Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>
>---
> .travis.yml                     |  3 +++
> INSTALL.DPDK.md                 |  8 ++++++--
> NEWS                            |  3 +++
> acinclude.m4                    |  2 +-
> lib/netdev-dpdk.c               | 37 ++++++++++++++++++++++++++++++++++---
> rhel/openvswitch-fedora.spec.in |  1 +
> 6 files changed, 48 insertions(+), 6 deletions(-)
>
>diff --git a/.travis.yml b/.travis.yml
>index ee2cf21..faba325 100644
>--- a/.travis.yml
>+++ b/.travis.yml
>@@ -11,10 +11,13 @@ addons:
>     packages:
>       - bc
>       - gcc-multilib
>+      - libnuma1
>+      - libnuma-dev
>       - libssl-dev
>       - llvm-dev
>       - libjemalloc1
>       - libjemalloc-dev
>+      - numactl
>
> before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
>
>diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 93f92e4..bbe0234
>100644
>--- a/INSTALL.DPDK.md
>+++ b/INSTALL.DPDK.md
>@@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support.
> Building and Installing:
> ------------------------
>
>-Required: DPDK 16.04
>+Required: DPDK 16.04, libnuma

The change above makes libnuma mandatory to build OVS with DPDK datapath. The 
config option CONFIG_RTE_LIBRTE_VHOST_NUMA is disabled by default in DPDK-16.04 
and hence steps to enable this option and build DPDK may have to be captured in 
"Configure build & Install DPDK" section of the install guide. 

> Optional (if building with vhost-cuse): `fuse`, `fuse-devel` (`libfuse-dev`  
> on
>Debian/Ubuntu)
>
>@@ -443,7 +443,11 @@ Performance Tuning:
>
>       It is good practice to ensure that threads that are in the datapath are
>       pinned to cores in the same NUMA area. e.g. pmd threads and QEMU
>vCPUs
>-      responsible for forwarding.
>+      responsible for forwarding. If DPDK is built with
>+      CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports
>automatically
>+      detect the NUMA socket of the QEMU vCPUs and will be serviced by a
>PMD
>+      from the same node provided a core on this node is enabled in the
>+      pmd-cpu-mask.
>
>   9. Rx Mergeable buffers
>
>diff --git a/NEWS b/NEWS
>index 4e81cad..24ca39f 100644
>--- a/NEWS
>+++ b/NEWS
>@@ -32,6 +32,9 @@ Post-v2.5.0
>      * DB entries have been added for many of the DPDK EAL command line
>        arguments. Additional arguments can be passed via the dpdk-extra
>        entry.
>+     * PMD threads servicing vHost User ports can now come from the NUMA
>+       node that device memory is located on if
>CONFIG_RTE_LIBRTE_VHOST_NUMA
>+       is enabled in DPDK.
>    - ovs-benchmark: This utility has been removed due to lack of use and
>      bitrot.
>    - ovs-appctl:
>diff --git a/acinclude.m4 b/acinclude.m4 index f3de855..99ddf04 100644
>--- a/acinclude.m4
>+++ b/acinclude.m4
>@@ -218,7 +218,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
>     DPDKLIB_FOUND=false
>     save_LIBS=$LIBS
>     for extras in "" "-ldl"; do
>-        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
>+        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma"

The above change makes libnuma mandatory for configuring OVS using DPDK 
datapath while ' CONFIG_RTE_LIBRTE_VHOST_NUMA' is disabled by default.
IMHO, can we check if LIBRTE_VHOST_NUMA is enabled(from rte_config.h)  and 
append "lnuma" only when it is true. This is inline with how we handle VHOST 
CUSE case.

>         AC_LINK_IFELSE(
>            [AC_LANG_PROGRAM([#include <rte_config.h>
>                              #include <rte_eal.h>], diff --git 
> a/lib/netdev-dpdk.c
>b/lib/netdev-dpdk.c index 0d1b8c9..ad6c4bb 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -30,6 +30,7 @@
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <getopt.h>
>+#include <numaif.h>
>
> #include "dirs.h"
> #include "dp-packet.h"
>@@ -378,6 +379,9 @@ struct netdev_dpdk {
>      * netdev_dpdk*_reconfigure() is called */
>     int requested_n_txq;
>     int requested_n_rxq;
>+
>+    /* Socket ID detected when vHost device is brought up */
>+    int requested_socket_id;
> };
>
> struct netdev_rxq_dpdk {
>@@ -747,6 +751,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
>port_no,
>     }
>
>     dev->socket_id = sid < 0 ? SOCKET0 : sid;
>+    dev->requested_socket_id = dev->socket_id;
>     dev->port_id = port_no;
>     dev->type = type;
>     dev->flags = 0;
>@@ -2149,6 +2154,8 @@ new_device(struct virtio_net *virtio_dev)  {
>     struct netdev_dpdk *dev;
>     bool exists = false;
>+    int newnode = 0;
>+    long err = 0;
>
>     ovs_mutex_lock(&dpdk_mutex);
>     /* Add device to the vhost port with the same name as that passed down.
>*/ @@ -2162,6 +2169,19 @@ new_device(struct virtio_net *virtio_dev)
>             }
>             ovsrcu_set(&dev->virtio_dev, virtio_dev);
>             exists = true;
>+
>+            /* Get NUMA information */
>+            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
>+                                MPOL_F_NODE | MPOL_F_ADDR);
>+            if (err) {
>+                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
>+                        virtio_dev->ifname);
>+                newnode = dev->socket_id;
>+            } else if (newnode != dev->socket_id) {
>+                dev->requested_socket_id = newnode;
>+                netdev_request_reconfigure(&dev->up);
>+            }
>+
>             virtio_dev->flags |= VIRTIO_DEV_RUNNING;
>             /* Disable notifications. */
>             set_irq_status(virtio_dev); @@ -2178,8 +2198,8 @@ 
> new_device(struct
>virtio_net *virtio_dev)
>         return -1;
>     }
>
>-    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added", virtio_dev-
>>ifname,
>-              virtio_dev->device_fh);
>+    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on socket %i",
>+              virtio_dev->ifname, virtio_dev->device_fh, newnode);
>     return 0;
> }
>
>@@ -2760,6 +2780,7 @@ static int
> netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev)  {
>     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>+    int err = 0;
>
>     ovs_mutex_lock(&dpdk_mutex);
>     ovs_mutex_lock(&dev->mutex);
>@@ -2767,10 +2788,20 @@ netdev_dpdk_vhost_user_reconfigure(struct
>netdev *netdev)
>     netdev->n_txq = dev->requested_n_txq;
>     netdev->n_rxq = dev->requested_n_rxq;
>
>+    if (dev->requested_socket_id != dev->socket_id) {
>+        dev->socket_id = dev->requested_socket_id;
>+        /* Change mempool to new NUMA Node */
>+        dpdk_mp_put(dev->dpdk_mp);
>+        dev->dpdk_mp = dpdk_mp_get(dev->socket_id, dev->mtu);
>+        if (!dev->dpdk_mp) {
>+            err = ENOMEM;
>+        }
>+    }
>+
>     ovs_mutex_unlock(&dev->mutex);
>     ovs_mutex_unlock(&dpdk_mutex);
>
>-    return 0;
>+    return err;
> }
>
> static int
>diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-
>fedora.spec.in index 0759096..e360d4d 100644
>--- a/rhel/openvswitch-fedora.spec.in
>+++ b/rhel/openvswitch-fedora.spec.in
>@@ -54,6 +54,7 @@ BuildRequires: libcap-ng libcap-ng-devel  %endif  %if
>%{with dpdk}
> BuildRequires: dpdk-devel >= 2.2.0
>+BuildRequires: numactl numactl-devel numactl-libs
> Provides: %{name}-dpdk = %{version}-%{release}  %endif
>
>--
>2.4.3
>
>_______________________________________________
>dev mailing list
>dev@openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to