Thanks for v2, applied to master.

2016-06-13 3:10 GMT-07:00 Ciara Loftus <ciara.lof...@intel.com>:

> This commit allows for vHost User memory from QEMU, DPDK and OVS, as
> well as the servicing PMD, to all come from the same socket.
>
> The socket id of a vhost-user port used to be set to that of the master
> lcore. Now it is possible to update the socket id if it is detected
> (during VM boot) that the vhost device memory is not on this node. If
> this is the case, a new mempool is created from the new node, and the
> PMD thread currently servicing the port will no longer, in favour of a
> thread from the new node (if enabled in the pmd-cpu-mask).
>
> To avail of this functionality, one must enable the
> CONFIG_RTE_LIBRTE_VHOST_NUMA DPDK configuration option.
>
> Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>
> ---
>
> v2:
> - Remove numactl dependencies from travis & fedora spec files
> - Updated log message
>
>  .travis.yml                     |  1 +
>  INSTALL.DPDK.md                 |  8 ++++++--
>  NEWS                            |  3 +++
>  acinclude.m4                    |  2 +-
>  lib/netdev-dpdk.c               | 37 ++++++++++++++++++++++++++++++++++---
>  rhel/openvswitch-fedora.spec.in |  2 ++
>  6 files changed, 47 insertions(+), 6 deletions(-)
>
> diff --git a/.travis.yml b/.travis.yml
> index ee2cf21..6c818cb 100644
> --- a/.travis.yml
> +++ b/.travis.yml
> @@ -11,6 +11,7 @@ addons:
>      packages:
>        - bc
>        - gcc-multilib
> +      - libnuma-dev
>        - libssl-dev
>        - llvm-dev
>        - libjemalloc1
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index c2e32bf..00e75bd 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -16,7 +16,7 @@ OVS needs a system with 1GB hugepages support.
>  Building and Installing:
>  ------------------------
>
> -Required: DPDK 16.04
> +Required: DPDK 16.04, libnuma
>  Optional (if building with vhost-cuse): `fuse`, `fuse-devel`
> (`libfuse-dev`
>  on Debian/Ubuntu)
>
> @@ -465,7 +465,11 @@ Performance Tuning:
>
>     It is good practice to ensure that threads that are in the datapath are
>     pinned to cores in the same NUMA area. e.g. pmd threads and QEMU vCPUs
> -   responsible for forwarding.
> +   responsible for forwarding. If DPDK is built with
> +   CONFIG_RTE_LIBRTE_VHOST_NUMA=y, vHost User ports automatically
> +   detect the NUMA socket of the QEMU vCPUs and will be serviced by a PMD
> +   from the same node provided a core on this node is enabled in the
> +   pmd-cpu-mask.
>
>  9. Rx Mergeable buffers
>
> diff --git a/NEWS b/NEWS
> index ba201cf..fe24449 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -33,6 +33,9 @@ Post-v2.5.0
>         arguments. Additional arguments can be passed via the dpdk-extra
>         entry.
>       * Add ingress policing functionality.
> +     * PMD threads servicing vHost User ports can now come from the NUMA
> +       node that device memory is located on if
> CONFIG_RTE_LIBRTE_VHOST_NUMA
> +       is enabled in DPDK.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
>     - ovs-appctl:
> diff --git a/acinclude.m4 b/acinclude.m4
> index 0a14856..3978980 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -219,7 +219,7 @@ AC_DEFUN([OVS_CHECK_DPDK], [
>      DPDKLIB_FOUND=false
>      save_LIBS=$LIBS
>      for extras in "" "-ldl"; do
> -        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB"
> +        LIBS="$DPDK_LIB $extras $save_LIBS $DPDK_EXTRA_LIB -lnuma"
>          AC_LINK_IFELSE(
>             [AC_LANG_PROGRAM([#include <rte_config.h>
>                               #include <rte_eal.h>],
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 19d355f..fc683e9 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -30,6 +30,7 @@
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <getopt.h>
> +#include <numaif.h>
>
>  #include "dirs.h"
>  #include "dp-packet.h"
> @@ -385,6 +386,9 @@ struct netdev_dpdk {
>      int requested_n_txq;
>      int requested_n_rxq;
>
> +    /* Socket ID detected when vHost device is brought up */
> +    int requested_socket_id;
> +
>      /* Ingress Policer */
>      OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
>      uint32_t policer_rate;
> @@ -761,6 +765,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      }
>
>      dev->socket_id = sid < 0 ? SOCKET0 : sid;
> +    dev->requested_socket_id = dev->socket_id;
>      dev->port_id = port_no;
>      dev->type = type;
>      dev->flags = 0;
> @@ -2344,6 +2349,8 @@ new_device(struct virtio_net *virtio_dev)
>  {
>      struct netdev_dpdk *dev;
>      bool exists = false;
> +    int newnode = 0;
> +    long err = 0;
>
>      ovs_mutex_lock(&dpdk_mutex);
>      /* Add device to the vhost port with the same name as that passed
> down. */
> @@ -2357,6 +2364,19 @@ new_device(struct virtio_net *virtio_dev)
>              }
>              ovsrcu_set(&dev->virtio_dev, virtio_dev);
>              exists = true;
> +
> +            /* Get NUMA information */
> +            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
> +                                MPOL_F_NODE | MPOL_F_ADDR);
> +            if (err) {
> +                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
> +                        virtio_dev->ifname);
> +                newnode = dev->socket_id;
> +            } else if (newnode != dev->socket_id) {
> +                dev->requested_socket_id = newnode;
> +                netdev_request_reconfigure(&dev->up);
> +            }
> +
>              virtio_dev->flags |= VIRTIO_DEV_RUNNING;
>              /* Disable notifications. */
>              set_irq_status(virtio_dev);
> @@ -2374,8 +2394,8 @@ new_device(struct virtio_net *virtio_dev)
>          return -1;
>      }
>
> -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added",
> virtio_dev->ifname,
> -              virtio_dev->device_fh);
> +    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node
> %i",
> +              virtio_dev->ifname, virtio_dev->device_fh, newnode);
>      return 0;
>  }
>
> @@ -2937,6 +2957,7 @@ static int
>  netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int err = 0;
>
>      ovs_mutex_lock(&dpdk_mutex);
>      ovs_mutex_lock(&dev->mutex);
> @@ -2944,10 +2965,20 @@ netdev_dpdk_vhost_user_reconfigure(struct netdev
> *netdev)
>      netdev->n_txq = dev->requested_n_txq;
>      netdev->n_rxq = dev->requested_n_rxq;
>
> +    if (dev->requested_socket_id != dev->socket_id) {
> +        dev->socket_id = dev->requested_socket_id;
> +        /* Change mempool to new NUMA Node */
> +        dpdk_mp_put(dev->dpdk_mp);
> +        dev->dpdk_mp = dpdk_mp_get(dev->socket_id, dev->mtu);
> +        if (!dev->dpdk_mp) {
> +            err = ENOMEM;
> +        }
> +    }
> +
>      ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>
> -    return 0;
> +    return err;
>  }
>
>  static int
> diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/
> openvswitch-fedora.spec.in
> index 0759096..959c90a 100644
> --- a/rhel/openvswitch-fedora.spec.in
> +++ b/rhel/openvswitch-fedora.spec.in
> @@ -54,6 +54,8 @@ BuildRequires: libcap-ng libcap-ng-devel
>  %endif
>  %if %{with dpdk}
>  BuildRequires: dpdk-devel >= 2.2.0
> +BuildRequires: numactl-devel
> +Requires: numactl-libs
>  Provides: %{name}-dpdk = %{version}-%{release}
>  %endif
>
> --
> 2.4.3
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to