Hi,
A few comments inline...
On 04/21/2015 01:10 PM, Ciara Loftus wrote:
This patch adds support for a new port type to the userspace
datapath called dpdkvhostuser. It adds to the existing
infrastructure of vhost-cuse, however disables vhost-cuse ports
as the default port type, in favour of vhost-user ports. Refer
to the documentation for enabling vhost-cuse ports if desired.
A new dpdkvhostuser port will create a unix domain socket which
when provided to QEMU is used to facilitate communication between
the virtio-net device on the VM and the OVS port on the host.
Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>
---
INSTALL.DPDK.md | 128 +++++++++++++++++++++++++++++++++++++++---------
acinclude.m4 | 13 +++++
configure.ac | 1 +
lib/netdev-dpdk.c | 45 ++++++++++++++++-
lib/netdev.c | 4 ++
vswitchd/ovs-vswitchd.c | 6 ++-
6 files changed, 169 insertions(+), 28 deletions(-)
diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index aae97a5..8bd11f3 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
[...]
Following the steps above to create a bridge, you can now add DPDK vhost
-as a port to the vswitch.
+as a port to the vswitch. Unlike DPDK ring ports, DPDK vhost ports can have
+arbitrary names.
+
+When adding vhost ports to the switch, take care depending on which
+type of vhost you are using.
+
+ - For vhost-user (default), the name of the port type is `dpdkvhostuser`
+
+ `ovs-ofctl add-port br0 vhost-user1 -- set Interface vhost-user1
type=dpdkvhostuser`
^^^^^^^^^^^
[...]
+DPDK vhost-user VM configuration:
+---------------------------------
+Follow the steps below to attach vhost-user port(s) to a VM.
+
+1. Configure sockets.
+ Pass the following parameters to QEMU to attach a vhost-user device:
+
+ ```
+ -chardev socket,id=char0,path=/tmp/vhost-user-1
^^^^^^^^^^^^
+ -netdev type=vhost-user,id=mynet1,chardev=char0,vhostforce
+ -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1
+ ```
-DPDK vhost VM configuration:
-----------------------------
+ ...where vhost-user-1 is the name of the vhost-user port added
^^^^^^^^^^^^
The first example uses "vhost-user1" whereas the latter ones use
"vhost-user-1". Please fix the examples to use a common name so that
copy-paste gives a working setup.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f69154b..deb8b83 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -101,8 +101,13 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF /
ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
+#ifdef VHOST_CUSE
/* Character device cuse_dev_name. */
char *cuse_dev_name = NULL;
+#else
+#define VHOST_USER_PORT_SOCK_PATH "/tmp/%s" /* Socket Location Template */
Using /tmp for these seems like asking for trouble to me, how about
somewhere /var/run? At least it should be configurable via cli, similar
to the cuse device name.
+#define VHOST_USER 1
+#endif
static const struct rte_eth_conf port_conf = {
.rxmode = {
@@ -231,6 +236,11 @@ struct netdev_dpdk {
/* virtio-net structure for vhost device */
OVSRCU_TYPE(struct virtio_net *) virtio_dev;
+#ifdef VHOST_USER
+ /* socket location for vhost-user device */
+ char socket_path[IF_NAME_SZ];
+#endif
+
/* In dpdk_list. */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
rte_spinlock_t txq_lock;
@@ -245,6 +255,8 @@ static bool thread_is_pmd(void);
static int netdev_dpdk_construct(struct netdev *);
+static void *start_vhost_loop(void *dummy);
+
struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
static bool
@@ -557,6 +569,20 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int
port_no,
netdev_->n_txq = NR_QUEUE;
netdev_->n_rxq = NR_QUEUE;
+#ifdef VHOST_USER
+ if (type == DPDK_DEV_VHOST) {
+ snprintf(netdev->socket_path, sizeof(netdev->socket_path),
VHOST_USER_PORT_SOCK_PATH, netdev_->name);
+ err = rte_vhost_driver_register(netdev->socket_path);
+ if (err != 0) {
+ VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
+ netdev->socket_path);
+ goto unlock;
+ }
+
+ VLOG_INFO("Socket %s created for vhost-user port %s\n",
netdev->socket_path, netdev_->name);
+ }
+#endif
+
if (type == DPDK_DEV_ETH) {
netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
err = dpdk_eth_dev_init(netdev);
@@ -1536,7 +1562,11 @@ new_device(struct virtio_net *dev)
ovs_mutex_lock(&dpdk_mutex);
/* Add device to the vhost port with the same name as that passed down. */
LIST_FOR_EACH(netdev, list_node, &dpdk_list) {
+#ifdef VHOST_CUSE
if (strncmp(dev->ifname, netdev->up.name, IFNAMSIZ) == 0) {
+#else
+ if (strncmp(dev->ifname, netdev->socket_path, IF_NAME_SZ) == 0) {
+#endif
ovs_mutex_lock(&netdev->mutex);
ovsrcu_set(&netdev->virtio_dev, dev);
ovs_mutex_unlock(&netdev->mutex);
@@ -1616,7 +1646,7 @@ const struct virtio_net_device_ops virtio_net_device_ops =
};
static void *
-start_cuse_session_loop(void *dummy OVS_UNUSED)
+start_vhost_loop(void *dummy OVS_UNUSED)
{
pthread_detach(pthread_self());
/* Put the cuse thread into quiescent state. */
@@ -1628,10 +1658,13 @@ start_cuse_session_loop(void *dummy OVS_UNUSED)
static int
dpdk_vhost_class_init(void)
{
+#ifdef VHOST_CUSE
int err = -1;
+#endif
rte_vhost_driver_callback_register(&virtio_net_device_ops);
+#ifdef VHOST_CUSE
/* Register CUSE device to handle IOCTLs.
* Unless otherwise specified on the vswitchd command line, cuse_dev_name
* is set to vhost-net.
@@ -1642,8 +1675,9 @@ dpdk_vhost_class_init(void)
VLOG_ERR("CUSE device setup failure.");
return -1;
}
+#endif
- ovs_thread_create("cuse_thread", start_cuse_session_loop, NULL);
+ ovs_thread_create("cuse_thread", start_vhost_loop, NULL);
return 0;
}
@@ -1864,6 +1898,8 @@ dpdk_init(int argc, char **argv)
* this string if it meets the correct criteria. Otherwise, set it to the
* default (vhost-net).
*/
+
+#ifdef VHOST_CUSE
if (!strcmp(argv[1], "--cuse_dev_name") &&
(strlen(argv[2]) <= NAME_MAX)) {
@@ -1882,6 +1918,7 @@ dpdk_init(int argc, char **argv)
cuse_dev_name = "vhost-net";
VLOG_INFO("No cuse_dev_name provided - defaulting to /dev/vhost-net");
}
+#endif
/* Keep the program name argument as this is needed for call to
* rte_eal_init()
@@ -1937,7 +1974,11 @@ const struct netdev_class dpdk_ring_class =
const struct netdev_class dpdk_vhost_class =
NETDEV_DPDK_CLASS(
+#ifdef VHOST_CUSE
"dpdkvhost",
+#else
+ "dpdkvhostuser",
+#endif
dpdk_vhost_class_init,
netdev_dpdk_vhost_construct,
netdev_dpdk_vhost_destruct,
diff --git a/lib/netdev.c b/lib/netdev.c
index 45f7f29..77513fa 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -111,7 +111,11 @@ netdev_is_pmd(const struct netdev *netdev)
{
return (!strcmp(netdev->netdev_class->type, "dpdk") ||
!strcmp(netdev->netdev_class->type, "dpdkr") ||
+#ifdef VHOST_CUSE
!strcmp(netdev->netdev_class->type, "dpdkvhost"));
+#else
+ !strcmp(netdev->netdev_class->type, "dpdkvhostuser"));
+#endif
}
static void
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index a1b33da..57e6224 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -252,9 +252,11 @@ usage(void)
daemon_usage();
vlog_usage();
printf("\nDPDK options:\n"
- " --dpdk options Initialize DPDK datapath.\n"
- " --cuse_dev_name BASENAME override default character device
name\n"
+ " --dpdk options Initialize DPDK datapath.\n");
+#ifdef VHOST_CUSE
+ printf(" --cuse_dev_name BASENAME override default character device
name\n"
" for use with userspace vHost.\n");
+#endif
printf("\nOther options:\n"
" --unixctl=SOCKET override default control socket
name\n"
" -h, --help display this help message\n"
That's a whole lot of ifdef's for something that has a common API for
both on the DPDK side. Please do correct me if I'm wrong, but I was
under the impression both vhost-cuse and vhost-user expect to operate by
having one central node (device/socket) to which qemu connects to
initially, and is then given process-specific fd's for further operation.
Here a separate socket is created for each vhost-user port, which makes
the code between vhost-cuse and -user differ more than it really needs
to, AFAICS. I can see its done this way to permit free(r) naming of the
ports: in vhost-cuse case the interface name can be communicated through
-netdev tap,ifname=<xxx> but -netdev type=vhost-user doesn't grok ifname
so something else needs to be used.
Just wondering if there's some other way around it that would avoid
having to make the otherwise two similar cases so different in OVS codebase.
The good news is that it does work and is far, far less picky about
host/guest kernel versions and such.
- Panu -
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev