Hi,

A few comments inline...

On 04/21/2015 01:10 PM, Ciara Loftus wrote:
This patch adds support for a new port type to the userspace
datapath called dpdkvhostuser. It adds to the existing
infrastructure of vhost-cuse, however disables vhost-cuse ports
as the default port type, in favour of vhost-user ports. Refer
to the documentation for enabling vhost-cuse ports if desired.

A new dpdkvhostuser port will create a unix domain socket which
when provided to QEMU is used to facilitate communication between
the virtio-net device on the VM and the OVS port on the host.

Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>
---
  INSTALL.DPDK.md         | 128 +++++++++++++++++++++++++++++++++++++++---------
  acinclude.m4            |  13 +++++
  configure.ac            |   1 +
  lib/netdev-dpdk.c       |  45 ++++++++++++++++-
  lib/netdev.c            |   4 ++
  vswitchd/ovs-vswitchd.c |   6 ++-
  6 files changed, 169 insertions(+), 28 deletions(-)

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index aae97a5..8bd11f3 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
[...]
  Following the steps above to create a bridge, you can now add DPDK vhost
-as a port to the vswitch.
+as a port to the vswitch. Unlike DPDK ring ports, DPDK vhost ports can have
+arbitrary names.
+
+When adding vhost ports to the switch, take care depending on which
+type of vhost you are using.
+
+  -  For vhost-user (default), the name of the port type is `dpdkvhostuser`
+
+      `ovs-ofctl add-port br0 vhost-user1 -- set Interface vhost-user1 
type=dpdkvhostuser`
                                 ^^^^^^^^^^^

[...]
+DPDK vhost-user VM configuration:
+---------------------------------
+Follow the steps below to attach vhost-user port(s) to a VM.
+
+1. Configure sockets.
+   Pass the following parameters to QEMU to attach a vhost-user device:
+
+   ```
+   -chardev socket,id=char0,path=/tmp/vhost-user-1
                                         ^^^^^^^^^^^^
+   -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce
+   -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1
+   ```

-DPDK vhost VM configuration:
-----------------------------
+   ...where vhost-user-1 is the name of the vhost-user port added
               ^^^^^^^^^^^^

The first example uses "vhost-user1" whereas the latter ones use "vhost-user-1". Please fix the examples to use a common name so that copy-paste gives a working setup.



diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f69154b..deb8b83 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -101,8 +101,13 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / 
ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))

  #define MAX_PKT_BURST 32           /* Max burst size for RX/TX */

+#ifdef VHOST_CUSE
  /* Character device cuse_dev_name. */
  char *cuse_dev_name = NULL;
+#else
+#define VHOST_USER_PORT_SOCK_PATH "/tmp/%s" /* Socket Location Template */

Using /tmp for these seems like asking for trouble to me, how about somewhere /var/run? At least it should be configurable via cli, similar to the cuse device name.

+#define VHOST_USER 1
+#endif

  static const struct rte_eth_conf port_conf = {
      .rxmode = {
@@ -231,6 +236,11 @@ struct netdev_dpdk {
      /* virtio-net structure for vhost device */
      OVSRCU_TYPE(struct virtio_net *) virtio_dev;

+#ifdef VHOST_USER
+    /* socket location for vhost-user device */
+    char socket_path[IF_NAME_SZ];
+#endif
+
      /* In dpdk_list. */
      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
      rte_spinlock_t txq_lock;
@@ -245,6 +255,8 @@ static bool thread_is_pmd(void);

  static int netdev_dpdk_construct(struct netdev *);

+static void *start_vhost_loop(void *dummy);
+
  struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);

  static bool
@@ -557,6 +569,20 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no,
      netdev_->n_txq = NR_QUEUE;
      netdev_->n_rxq = NR_QUEUE;

+#ifdef VHOST_USER
+    if (type == DPDK_DEV_VHOST) {
+        snprintf(netdev->socket_path, sizeof(netdev->socket_path), 
VHOST_USER_PORT_SOCK_PATH, netdev_->name);
+        err = rte_vhost_driver_register(netdev->socket_path);
+        if (err != 0) {
+            VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
+                     netdev->socket_path);
+            goto unlock;
+        }
+
+        VLOG_INFO("Socket %s created for vhost-user port %s\n", 
netdev->socket_path, netdev_->name);
+    }
+#endif
+
      if (type == DPDK_DEV_ETH) {
            netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
            err = dpdk_eth_dev_init(netdev);
@@ -1536,7 +1562,11 @@ new_device(struct virtio_net *dev)
      ovs_mutex_lock(&dpdk_mutex);
      /* Add device to the vhost port with the same name as that passed down. */
      LIST_FOR_EACH(netdev, list_node, &dpdk_list) {
+#ifdef VHOST_CUSE
          if (strncmp(dev->ifname, netdev->up.name, IFNAMSIZ) == 0) {
+#else
+        if (strncmp(dev->ifname, netdev->socket_path, IF_NAME_SZ) == 0) {
+#endif
              ovs_mutex_lock(&netdev->mutex);
              ovsrcu_set(&netdev->virtio_dev, dev);
              ovs_mutex_unlock(&netdev->mutex);
@@ -1616,7 +1646,7 @@ const struct virtio_net_device_ops virtio_net_device_ops =
  };

  static void *
-start_cuse_session_loop(void *dummy OVS_UNUSED)
+start_vhost_loop(void *dummy OVS_UNUSED)
  {
       pthread_detach(pthread_self());
       /* Put the cuse thread into quiescent state. */
@@ -1628,10 +1658,13 @@ start_cuse_session_loop(void *dummy OVS_UNUSED)
  static int
  dpdk_vhost_class_init(void)
  {
+#ifdef VHOST_CUSE
      int err = -1;
+#endif

      rte_vhost_driver_callback_register(&virtio_net_device_ops);

+#ifdef VHOST_CUSE
      /* Register CUSE device to handle IOCTLs.
       * Unless otherwise specified on the vswitchd command line, cuse_dev_name
       * is set to vhost-net.
@@ -1642,8 +1675,9 @@ dpdk_vhost_class_init(void)
          VLOG_ERR("CUSE device setup failure.");
          return -1;
      }
+#endif

-    ovs_thread_create("cuse_thread", start_cuse_session_loop, NULL);
+    ovs_thread_create("cuse_thread", start_vhost_loop, NULL);
      return 0;
  }

@@ -1864,6 +1898,8 @@ dpdk_init(int argc, char **argv)
       * this string if it meets the correct criteria. Otherwise, set it to the
       * default (vhost-net).
       */
+
+#ifdef VHOST_CUSE
      if (!strcmp(argv[1], "--cuse_dev_name") &&
          (strlen(argv[2]) <= NAME_MAX)) {

@@ -1882,6 +1918,7 @@ dpdk_init(int argc, char **argv)
          cuse_dev_name = "vhost-net";
          VLOG_INFO("No cuse_dev_name provided - defaulting to /dev/vhost-net");
      }
+#endif

      /* Keep the program name argument as this is needed for call to
       * rte_eal_init()
@@ -1937,7 +1974,11 @@ const struct netdev_class dpdk_ring_class =

  const struct netdev_class dpdk_vhost_class =
      NETDEV_DPDK_CLASS(
+#ifdef VHOST_CUSE
          "dpdkvhost",
+#else
+        "dpdkvhostuser",
+#endif
          dpdk_vhost_class_init,
          netdev_dpdk_vhost_construct,
          netdev_dpdk_vhost_destruct,
diff --git a/lib/netdev.c b/lib/netdev.c
index 45f7f29..77513fa 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -111,7 +111,11 @@ netdev_is_pmd(const struct netdev *netdev)
  {
      return (!strcmp(netdev->netdev_class->type, "dpdk") ||
              !strcmp(netdev->netdev_class->type, "dpdkr") ||
+#ifdef VHOST_CUSE
              !strcmp(netdev->netdev_class->type, "dpdkvhost"));
+#else
+            !strcmp(netdev->netdev_class->type, "dpdkvhostuser"));
+#endif
  }

  static void
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index a1b33da..57e6224 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -252,9 +252,11 @@ usage(void)
      daemon_usage();
      vlog_usage();
      printf("\nDPDK options:\n"
-           "  --dpdk options            Initialize DPDK datapath.\n"
-           "  --cuse_dev_name BASENAME  override default character device 
name\n"
+           "  --dpdk options            Initialize DPDK datapath.\n");
+#ifdef VHOST_CUSE
+    printf("  --cuse_dev_name BASENAME  override default character device 
name\n"
             "                            for use with userspace vHost.\n");
+#endif
      printf("\nOther options:\n"
             "  --unixctl=SOCKET          override default control socket 
name\n"
             "  -h, --help                display this help message\n"


That's a whole lot of ifdef's for something that has a common API for both on the DPDK side. Please do correct me if I'm wrong, but I was under the impression both vhost-cuse and vhost-user expect to operate by having one central node (device/socket) to which qemu connects to initially, and is then given process-specific fd's for further operation.

Here a separate socket is created for each vhost-user port, which makes the code between vhost-cuse and -user differ more than it really needs to, AFAICS. I can see its done this way to permit free(r) naming of the ports: in vhost-cuse case the interface name can be communicated through -netdev tap,ifname=<xxx> but -netdev type=vhost-user doesn't grok ifname so something else needs to be used.

Just wondering if there's some other way around it that would avoid having to make the otherwise two similar cases so different in OVS codebase.

The good news is that it does work and is far, far less picky about host/guest kernel versions and such.

        - Panu -
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to