Thanks for the patch, I tried it and it makes possible to restart vswitchd and qemu.
I believe that now vhost_server_id and vhost_client_id are not constant for the lifetime of the struct and must be protected with dev->mutex. The following incremental on top of your patch does that and remove extra parentheses from sizeof operator: - /* Identifiers used to distinguish vhost devices from each other. They do - * not change during the lifetime of a struct netdev_dpdk. They can be read - * without holding any mutex. */ - const char vhost_server_id[PATH_MAX]; - const char vhost_client_id[PATH_MAX]; + /* Identifiers used to distinguish vhost devices from each other. */ + char vhost_server_id[PATH_MAX]; + char vhost_client_id[PATH_MAX]; /* In dpdk_list. */ struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); @@ -837,6 +835,7 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[], * use */ static const char * get_vhost_id(struct netdev_dpdk *dev) + OVS_REQUIRES(dev->mutex) { return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ? dev->vhost_client_id : dev->vhost_server_id; @@ -867,20 +866,20 @@ netdev_dpdk_vhost_construct(struct netdev *netdev) /* Take the name of the vhost-user port and append it to the location where * the socket is to be created, then register the socket. */ - snprintf(CONST_CAST(char *, dev->vhost_server_id), - sizeof(dev->vhost_server_id), "%s/%s", vhost_sock_dir, name); + snprintf(dev->vhost_server_id, sizeof dev->vhost_server_id, "%s/%s", + vhost_sock_dir, name); - err = rte_vhost_driver_register(get_vhost_id(dev), + err = rte_vhost_driver_register(dev->vhost_server_id, dev->vhost_driver_flags); if (err) { VLOG_ERR("vhost-user socket device setup failure for socket %s\n", - get_vhost_id(dev)); + dev->vhost_server_id); } else { if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { /* OVS server mode - add this socket to list for deletion */ - fatal_signal_add_file_to_unlink(get_vhost_id(dev)); + fatal_signal_add_file_to_unlink(dev->vhost_server_id); VLOG_INFO("Socket %s created for vhost-user port %s\n", - get_vhost_id(dev), name); + dev->vhost_server_id, name); } err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); } @@ -935,17 +934,19 @@ netdev_dpdk_destruct(struct netdev *netdev) * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a * deadlock, none of the mutexes must be held while calling this function. */ static int -dpdk_vhost_driver_unregister(struct netdev_dpdk *dev) +dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED, + const char *vhost_id) OVS_EXCLUDED(dpdk_mutex) OVS_EXCLUDED(dev->mutex) { - return rte_vhost_driver_unregister(get_vhost_id(dev)); + return rte_vhost_driver_unregister(vhost_id); } static void netdev_dpdk_vhost_destruct(struct netdev *netdev) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + char *vhost_id; ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); @@ -967,15 +968,18 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev) ovs_list_remove(&dev->list_node); dpdk_mp_put(dev->dpdk_mp); + vhost_id = xstrdup(get_vhost_id(dev)); + ovs_mutex_unlock(&dev->mutex); ovs_mutex_unlock(&dpdk_mutex); - if (dpdk_vhost_driver_unregister(dev)) { - VLOG_ERR("Unable to remove vhost-user socket %s", get_vhost_id(dev)); + if (dpdk_vhost_driver_unregister(dev, vhost_id)) { + VLOG_ERR("Unable to remove vhost-user socket %s", vhost_id); } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { /* OVS server mode - remove this socket from list for deletion */ - fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); + fatal_signal_remove_file_to_unlink(vhost_id); } + free(vhost_id); } static void @@ -2297,10 +2301,10 @@ new_device(int vid) ovs_mutex_lock(&dpdk_mutex); /* Add device to the vhost port with the same name as that passed down. */ LIST_FOR_EACH(dev, list_node, &dpdk_list) { + ovs_mutex_lock(&dev->mutex); if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { uint32_t qp_num = rte_vhost_get_queue_num(vid); - ovs_mutex_lock(&dev->mutex); /* Get NUMA information */ newnode = rte_vhost_get_numa_node(vid); if (newnode == -1) { @@ -2330,6 +2334,7 @@ new_device(int vid) ovs_mutex_unlock(&dev->mutex); break; } + ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dpdk_mutex); @@ -2423,8 +2428,8 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) ovs_mutex_lock(&dpdk_mutex); LIST_FOR_EACH (dev, list_node, &dpdk_list) { + ovs_mutex_lock(&dev->mutex); if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { - ovs_mutex_lock(&dev->mutex); if (enable) { dev->tx_q[qid].map = qid; } else { @@ -2435,6 +2440,7 @@ vring_state_changed(int vid, uint16_t queue_id, int enable) ovs_mutex_unlock(&dev->mutex); break; } + ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dpdk_mutex); @@ -2950,12 +2956,14 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev) */ if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) && !(netdev_dpdk_get_vid(dev) >= 0) - && dev->requested_vhost_client_path && strlen(dev->requested_vhost_client_path)) { /* Unregister server-mode device */ - ovs_mutex_unlock(&dpdk_mutex); + char *vhost_id = xstrdup(get_vhost_id(dev)); + ovs_mutex_unlock(&dev->mutex); - err = dpdk_vhost_driver_unregister(dev); + ovs_mutex_unlock(&dpdk_mutex); + err = dpdk_vhost_driver_unregister(dev, vhost_id); + free(vhost_id); ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); if (err) { @@ -2964,8 +2972,7 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev) } else { fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); /* Create the new vhost_id using path specified */ - snprintf(CONST_CAST(char *, dev->vhost_client_id), - sizeof(dev->vhost_client_id), + snprintf(dev->vhost_client_id, sizeof dev->vhost_client_id, "%s/%s", dev->requested_vhost_client_path, dev-> up.name); /* Register client-mode device */ err = rte_vhost_driver_register(dev->vhost_client_id, There was a misunderstanding about the interface. I intended vhost-server-path to include the full path to the socket, not just the directory. This is more inline with what qemu does. I believe there's also no need for the intermediate 'requested_vhost_client_path' if we use the full path. If you agree with the comments, would you mind sending another version (I hope to be able to apply that)? I still believe we could adjust the user interface after branching, before release. Thanks, Daniele 2016-08-11 9:28 GMT-07:00 Ciara Loftus <ciara.lof...@intel.com>: > Until now, vHost ports in OVS have only been able to operate in 'server' > mode whereby OVS creates and manages the vHost socket and essentially > acts as the vHost 'server'. With this commit a new mode, 'client' mode, > is available. In this mode, OVS acts as the vHost 'client' and connects > to the socket created and managed by QEMU which now acts as the vHost > 'server'. This mode allows for reconnect capability, which allows a > vHost port to resume normal connectivity in event of switch reset. > > By default dpdkvhostuser ports still operate in 'server' mode. That is > unless a valid 'vhost-server-path' is specified for that device like so: > > ovs-vsctl set Interface <vhostportname> > options:vhost-server-path=<path_to_socket_dir> > > Once specified, the port stays in 'client' mode for the remainder of its > lifetime. > > QEMU v2.7.0+ is required when using OVS in vHost client mode and QEMU in > vHost server mode. > > Signed-off-by: Ciara Loftus <ciara.lof...@intel.com> > --- > v4: > - Rebase > - Remove vhost-driver-mode and allow per-interface flag. > - Use 'vhost-server-path' option to enable client mode for the given > port and also to set the path for the client port. > > v3: > - Only restrict vhost_sock_dir if server mode > > v2 > - Updated comments in vhost construct & destruct > - Add check for server-mode before printing error when destruct is called > on a running VM > - Fixed coding style/standards issues > - Use strcmp instead of strncmp when processing 'vhost-driver-mode' > --- > INSTALL.DPDK-ADVANCED.md | 34 +++++++++++++ > NEWS | 1 + > lib/netdev-dpdk.c | 130 ++++++++++++++++++++++++++++++ > +++++++++-------- > vswitchd/vswitch.xml | 10 ++++ > 4 files changed, 154 insertions(+), 21 deletions(-) > > diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md > index 8d6cabc..6f03533 100755 > --- a/INSTALL.DPDK-ADVANCED.md > +++ b/INSTALL.DPDK-ADVANCED.md > @@ -568,6 +568,40 @@ For users wanting to do packet forwarding using > kernel stack below are the steps > where `-L`: Changes the numbers of channels of the specified > network device > and `combined`: Changes the number of multi-purpose channels. > > + 4. OVS vHost client-mode & vHost reconnect (OPTIONAL) > + > + By default, OVS DPDK acts as the vHost socket server for > dpdkvhostuser > + ports and QEMU acts as the vHost client. This means OVS creates and > + manages the vHost socket and QEMU is the client which connects to > the > + vHost server (OVS). In QEMU v2.7 the option is available for QEMU > to > + act as the vHost server meaning the roles can be reversed and OVS > can > + become the vHost client. To enable client mode for a given > + dpdkvhostuserport, one must specify a valid 'vhost-server-path' > like so: > + > + ``` > + ovs-vsctl set Interface <vhostportname> options:vhost-server-path=< > dir> > + ``` > + > + Setting this value automatically switches the port to client mode > (from > + OVS' perspective). > + 'vhost-server-path' reflects the location the vHost socket > of the vHost socket > + <vhostportname> resides in, or will reside in once QEMU is > launched. > + The port remains in 'client' mode for the remainder of it's > lifetime ie. > + it cannot be reverted back to server mode. > + > + One must append ',server' to the 'chardev' arguments on the QEMU > command > + line, to instruct QEMU to use vHost server mode for a given > interface, > + like so: > + > + ```` > + -chardev socket,id=char0,path=<dir>/<vhostportname>,server > + ```` > + > + One benefit of using this mode is the ability for vHost ports to > + 'reconnect' in event of the switch crashing or being brought down. > Once > + it is brought back up, the vHost ports will reconnect > automatically and > + normal service will resume. > + > - VM Configuration with libvirt > > * change the user/group, access control policty and restart libvirtd. > diff --git a/NEWS b/NEWS > index 9f09e1c..99412ba 100644 > --- a/NEWS > +++ b/NEWS > @@ -70,6 +70,7 @@ Post-v2.5.0 > fragmentation or NAT support yet) > * Support for DPDK 16.07 > * Remove dpdkvhostcuse port type. > + * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7) > - Increase number of registers to 16. > - ovs-benchmark: This utility has been removed due to lack of use and > bitrot. > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 4e4c74e..e480ce8 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -352,10 +352,11 @@ struct netdev_dpdk { > /* True if vHost device is 'up' and has been reconfigured at least > once */ > bool vhost_reconfigured; > > - /* Identifier used to distinguish vhost devices from each other. It > does > - * not change during the lifetime of a struct netdev_dpdk. It can be > read > + /* Identifiers used to distinguish vhost devices from each other. > They do > + * not change during the lifetime of a struct netdev_dpdk. They can > be read > * without holding any mutex. */ > - const char vhost_id[PATH_MAX]; > + const char vhost_server_id[PATH_MAX]; > + const char vhost_client_id[PATH_MAX]; > > /* In dpdk_list. */ > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > @@ -373,6 +374,12 @@ struct netdev_dpdk { > /* Socket ID detected when vHost device is brought up */ > int requested_socket_id; > > + /* Directory where vHost client socket resides */ > + char requested_vhost_client_path[PATH_MAX]; > + > + /* Denotes whether vHost port is client/server mode */ > + uint64_t vhost_driver_flags; > + > /* Ingress Policer */ > OVSRCU_TYPE(struct ingress_policer *) ingress_policer; > uint32_t policer_rate; > @@ -760,6 +767,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int > port_no, > dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > ovsrcu_index_init(&dev->vid, -1); > dev->vhost_reconfigured = false; > + /* initialise vHost port in server mode */ > + dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT; > > buf_size = dpdk_buf_size(dev->mtu); > dev->dpdk_mp = dpdk_mp_get(dev->socket_id, > FRAME_LEN_TO_MTU(buf_size)); > @@ -824,13 +833,21 @@ dpdk_dev_parse_name(const char dev_name[], const > char prefix[], > } > } > > +/* Returns a pointer to the relevant vHost socket ID depending on the > mode in > + * use */ > +static const char * > +get_vhost_id(struct netdev_dpdk *dev) > +{ > + return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ? > + dev->vhost_client_id : dev->vhost_server_id; > +} > + > static int > netdev_dpdk_vhost_construct(struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > const char *name = netdev->name; > int err; > - uint64_t flags = 0; > > /* 'name' is appended to 'vhost_sock_dir' and used to create a socket > in > * the file system. '/' or '\' would traverse directories, so they're > not > @@ -850,17 +867,21 @@ netdev_dpdk_vhost_construct(struct netdev *netdev) > /* Take the name of the vhost-user port and append it to the location > where > * the socket is to be created, then register the socket. > */ > - snprintf(CONST_CAST(char *, dev->vhost_id), sizeof dev->vhost_id, > "%s/%s", > - vhost_sock_dir, name); > + snprintf(CONST_CAST(char *, dev->vhost_server_id), > + sizeof(dev->vhost_server_id), "%s/%s", vhost_sock_dir, name); > > - err = rte_vhost_driver_register(dev->vhost_id, flags); > + err = rte_vhost_driver_register(get_vhost_id(dev), > + dev->vhost_driver_flags); > if (err) { > VLOG_ERR("vhost-user socket device setup failure for socket %s\n", > - dev->vhost_id); > + get_vhost_id(dev)); > } else { > - fatal_signal_add_file_to_unlink(dev->vhost_id); > - VLOG_INFO("Socket %s created for vhost-user port %s\n", > - dev->vhost_id, name); > + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > + /* OVS server mode - add this socket to list for deletion */ > + fatal_signal_add_file_to_unlink(get_vhost_id(dev)); > + VLOG_INFO("Socket %s created for vhost-user port %s\n", > + get_vhost_id(dev), name); > + } > err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST); > } > > @@ -918,7 +939,7 @@ dpdk_vhost_driver_unregister(struct netdev_dpdk *dev) > OVS_EXCLUDED(dpdk_mutex) > OVS_EXCLUDED(dev->mutex) > { > - return rte_vhost_driver_unregister(dev->vhost_id); > + return rte_vhost_driver_unregister(get_vhost_id(dev)); > } > > static void > @@ -930,12 +951,13 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev) > ovs_mutex_lock(&dev->mutex); > > /* Guest becomes an orphan if still attached. */ > - if (netdev_dpdk_get_vid(dev) >= 0) { > + if (netdev_dpdk_get_vid(dev) >= 0 > + && !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > VLOG_ERR("Removing port '%s' while vhost device still attached.", > netdev->name); > VLOG_ERR("To restore connectivity after re-adding of port, VM on > socket" > " '%s' must be restarted.", > - dev->vhost_id); > + get_vhost_id(dev)); > } > > free(ovsrcu_get_protected(struct ingress_policer *, > @@ -949,9 +971,10 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev) > ovs_mutex_unlock(&dpdk_mutex); > > if (dpdk_vhost_driver_unregister(dev)) { > - VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id); > - } else { > - fatal_signal_remove_file_to_unlink(dev->vhost_id); > + VLOG_ERR("Unable to remove vhost-user socket %s", > get_vhost_id(dev)); > + } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > + /* OVS server mode - remove this socket from list for deletion */ > + fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); > } > } > > @@ -1013,6 +1036,30 @@ netdev_dpdk_set_config(struct netdev *netdev, const > struct smap *args) > } > > static int > +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap > *args) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + const char *path; > + struct stat s; > + > + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) { > + path = smap_get(args, "vhost-server-path"); > + /* Request reconfigure if 'path': > + * 1. is non-NULL. > + * 2. represents a valid existing path. > I think we should drop this. We want the port to be added even if the path is not there. When qemu is started in will create the socket. (This is assuming that we change 'vhost-server-path' to include the full path to the socket) > + * 3. hasn't already been requested ie. has changed since last > call. > + */ > + if (path && !stat(path, &s) > + && strcmp(path, dev->requested_vhost_client_path)) { > + strcpy(dev->requested_vhost_client_path, path); > I think this should be limited with ovs_strlcpy(dev->requested_vhost_client_path, path, sizeof dev->requested_vhost_client_path); > + netdev_request_reconfigure(netdev); > + } > + } > + > + return 0; > +} > + > +static int > netdev_dpdk_get_numa_id(const struct netdev *netdev) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > @@ -2226,7 +2273,7 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev) > } > } > > - VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id); > + VLOG_DBG("TX queue mapping for %s\n", get_vhost_id(dev)); > for (i = 0; i < total_txqs; i++) { > VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map); > } > @@ -2250,7 +2297,7 @@ new_device(int vid) > ovs_mutex_lock(&dpdk_mutex); > /* Add device to the vhost port with the same name as that passed > down. */ > LIST_FOR_EACH(dev, list_node, &dpdk_list) { > - if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) { > + if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { > uint32_t qp_num = rte_vhost_get_queue_num(vid); > > ovs_mutex_lock(&dev->mutex); > @@ -2376,7 +2423,7 @@ vring_state_changed(int vid, uint16_t queue_id, int > enable) > > ovs_mutex_lock(&dpdk_mutex); > LIST_FOR_EACH (dev, list_node, &dpdk_list) { > - if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) { > + if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) { > ovs_mutex_lock(&dev->mutex); > if (enable) { > dev->tx_q[qid].map = qid; > @@ -2895,6 +2942,47 @@ netdev_dpdk_vhost_reconfigure(struct netdev > *netdev) > dev->vhost_reconfigured = true; > } > > + /* Configure vHost client mode if requested and if the following > criteria > + * are met: > + * 1. Device is currently in 'server' mode. > + * 2. Device is currently not active. > + * 3. A valid path has been specified. > + */ > + if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT) > + && !(netdev_dpdk_get_vid(dev) >= 0) > + && dev->requested_vhost_client_path > + && strlen(dev->requested_vhost_client_path)) { > + /* Unregister server-mode device */ > + ovs_mutex_unlock(&dpdk_mutex); > + ovs_mutex_unlock(&dev->mutex); > + err = dpdk_vhost_driver_unregister(dev); > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&dev->mutex); > + if (err) { > + VLOG_ERR("Unable to remove vhost-user socket %s", > + get_vhost_id(dev)); > + } else { > + fatal_signal_remove_file_to_unlink(get_vhost_id(dev)); > + /* Create the new vhost_id using path specified */ > + snprintf(CONST_CAST(char *, dev->vhost_client_id), > + sizeof(dev->vhost_client_id), > + "%s/%s", dev->requested_vhost_client_path, dev-> > up.name); > + /* Register client-mode device */ > + err = rte_vhost_driver_register(dev->vhost_client_id, > + RTE_VHOST_USER_CLIENT); > + if (err) { > + VLOG_ERR("vhost-user device setup failure for device > %s\n", > + dev->vhost_client_id); > + } else { > + /* Configuration successful */ > + dev->vhost_driver_flags |= RTE_VHOST_USER_CLIENT; > + VLOG_INFO("vHost User device '%s' changed to 'client' > mode, " > + "using client socket '%s'", > + dev->up.name, get_vhost_id(dev)); > + } > + } > + } > + > ovs_mutex_unlock(&dev->mutex); > ovs_mutex_unlock(&dpdk_mutex); > > @@ -3382,7 +3470,7 @@ static const struct netdev_class OVS_UNUSED > dpdk_vhost_class = > "dpdkvhostuser", > netdev_dpdk_vhost_construct, > netdev_dpdk_vhost_destruct, > - NULL, > + netdev_dpdk_vhost_set_config, > NULL, > netdev_dpdk_vhost_send, > netdev_dpdk_vhost_get_carrier, > diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml > index 63f0d89..02adf8a 100644 > --- a/vswitchd/vswitch.xml > +++ b/vswitchd/vswitch.xml > @@ -2366,6 +2366,16 @@ > </ul> > </p> > </column> > + > + <column name="options" key="vhost-server-path" > + type='{"type": "string"}'> > + <p> > + When specified, switches the given port permanently to 'client' > + mode. The value specifies the directory in which to find the > sockets > + of vHost User client mode devices created by QEMU. > + Only supported by DPDK vHost interfaces. > + </p> > + </column> > </group> > > <group title="Interface Status"> > -- > 2.4.3 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev