'dpdk' ports no longer have naming restrictions. Now, instead of specifying the dpdk port ID as part of the name, the PCI address of the device must be specified via the 'dpdk-pci' option. eg.
ovs-vsctl add-port br0 my-port ovs-vsctl set Interface my-port type=dpdk ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3 Signed-off-by: Ciara Loftus <ciara.lof...@intel.com> --- INSTALL.DPDK.md | 12 ++--- NEWS | 2 + lib/netdev-dpdk.c | 142 +++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 127 insertions(+), 29 deletions(-) diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md index 28c5b90..ad2fcbf 100644 --- a/INSTALL.DPDK.md +++ b/INSTALL.DPDK.md @@ -208,13 +208,13 @@ Using the DPDK with ovs-vswitchd: `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev` - Now you can add dpdk devices. OVS expects DPDK device names to start with - "dpdk" and end with a portid. vswitchd should print (in the log file) the - number of dpdk devices found. + Now you can add dpdk devices. The PCI address of the device needs to be + set using the 'dpdk-pci' option. vswitchd should print (in the log file) + the number and PCI addresses of dpdk devices found during initialisation. ``` - ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk - ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk + ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk options:dpdk-pci=0000:06:00.0 + ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk options:dpdk-pci=0000:06:00.1 ``` Once first DPDK port is added to vswitchd, it creates a Polling thread and @@ -304,7 +304,7 @@ Using the DPDK with ovs-vswitchd: It is also possible to detach a port from ovs, the user has to remove the port using the del-port command, then it can be detached using: - `ovs-appctl netdev-dpdk/port-detach dpdk0` + `ovs-appctl netdev-dpdk/port-detach 0000:01:00.0` This feature is not supported with VFIO and could not work with some NICs, please refer to the [DPDK Port Hotplug Framework] in order to get more diff --git a/NEWS b/NEWS index a1146b0..db702b7 100644 --- a/NEWS +++ b/NEWS @@ -49,6 +49,8 @@ Post-v2.5.0 node that device memory is located on if CONFIG_RTE_LIBRTE_VHOST_NUMA is enabled in DPDK. * Port Hotplug is now supported. + * DPDK physical ports can now have arbitrary names. The PCI address of + the device must be set using the 'dpdk-pci' option. - ovs-benchmark: This utility has been removed due to lack of use and bitrot. - ovs-appctl: diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 857339a..8e69f3a 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -144,6 +144,10 @@ static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ #define VHOST_ENQ_RETRY_NUM 8 +static uint8_t nb_ports; /* Number of DPDK ports initialised */ +struct rte_pci_addr pci_devs[RTE_MAX_ETHPORTS]; /* PCI info of initialised DPDK + devices */ + static const struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, @@ -757,7 +761,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no, /* If the 'sid' is negative, it means that the kernel fails * to obtain the pci numa info. In that situation, always * use 'SOCKET0'. */ - if (type == DPDK_DEV_ETH) { + if (type == DPDK_DEV_ETH && (dev->port_id != -1)) { sid = rte_eth_dev_socket_id(port_no); } else { sid = rte_lcore_to_socket_id(rte_get_master_lcore()); @@ -795,9 +799,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no, if (type == DPDK_DEV_ETH) { netdev_dpdk_alloc_txq(dev, NR_QUEUE); - err = dpdk_eth_dev_init(dev); - if (err) { - goto unlock; + if (dev->port_id != -1) { + err = dpdk_eth_dev_init(dev); + if (err) { + goto unlock; + } } } else { netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM); @@ -909,21 +915,14 @@ netdev_dpdk_vhost_user_construct(struct netdev *netdev) static int netdev_dpdk_construct(struct netdev *netdev) { - unsigned int port_no; int err; if (rte_eal_init_ret) { return rte_eal_init_ret; } - /* Names always start with "dpdk" */ - err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no); - if (err) { - return err; - } - ovs_mutex_lock(&dpdk_mutex); - err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH); + err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH); ovs_mutex_unlock(&dpdk_mutex); return err; } @@ -1002,11 +1001,36 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) return 0; } +static void +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr *addr) +{ + int i = 0; + + for (i = 0; i < nb_ports; i++) { + if (!rte_eal_compare_pci_addr(&pci_devs[i], addr)) { + dev->port_id = i; + break; + } + } + + if (dev->port_id != -1) { + rte_eth_dev_stop(dev->port_id); + dev->socket_id = rte_eth_dev_socket_id(dev->port_id); + dpdk_eth_dev_init(dev); + } else { + VLOG_INFO("Invalid PCI address for device %s", dev->up.name); + } + + return; +} + static int netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int new_n_rxq; + struct rte_pci_addr addr; + const char *pci_str; ovs_mutex_lock(&dev->mutex); new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1); @@ -1014,6 +1038,19 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) dev->requested_n_rxq = new_n_rxq; netdev_request_reconfigure(netdev); } + + if (dev->port_id == -1) { + pci_str = smap_get(args, "dpdk-pci"); + if (pci_str != NULL) { + if (!eal_parse_pci_DomBDF(pci_str, &addr)) { + netdev_dpdk_associate_pci(dev, &addr); + } else { + VLOG_ERR("Error parsing PCI address %s, please check format", + pci_str); + } + } + } + ovs_mutex_unlock(&dev->mutex); return 0; @@ -2259,6 +2296,7 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn, int argc OVS_UNUSED, int ret; char response[128]; uint8_t port_id; + struct rte_eth_dev_info info; ovs_mutex_lock(&dpdk_mutex); @@ -2274,6 +2312,15 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn, int argc OVS_UNUSED, snprintf(response, sizeof(response), "Device '%s' has been attached as 'dpdk%d'", argv[1], port_id); + /* Add the new PCI address to list of devices available */ + rte_eth_dev_info_get(port_id, &info); + pci_devs[port_id] = info.pci_dev->addr; + VLOG_INFO("DPDK PCI device %i:%i:%i.%i available", + pci_devs[port_id].domain, pci_devs[port_id].bus, + pci_devs[port_id].devid, pci_devs[port_id].function); + /* Update the number of ports */ + nb_ports = rte_eth_dev_count(); + ovs_mutex_unlock(&dpdk_mutex); unixctl_command_reply(conn, response); } @@ -2284,41 +2331,79 @@ netdev_dpdk_port_detach(struct unixctl_conn *conn, int argc OVS_UNUSED, { int ret; char response[128]; - unsigned int parsed_port; uint8_t port_id; char devname[RTE_ETH_NAME_MAX_LEN]; + bool found = false; + bool in_use = false; + int i; + struct rte_pci_addr addr; + struct netdev_dpdk *dev; ovs_mutex_lock(&dpdk_mutex); - ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port); - if (ret) { + /* Parse the PCI address to a usable format */ + if (eal_parse_pci_DomBDF(argv[1], &addr)) { snprintf(response, sizeof(response), - "'%s' is not a valid port", argv[1]); + "'%s' is not a valid PCI address format - cannot detach", + argv[1]); goto error; } - port_id = parsed_port; + /* Search for the address in the initialised devices */ + for (i = 0; i < nb_ports; i++) { + if (!rte_eal_compare_pci_addr(&pci_devs[i], &addr)) { + port_id = i; + found = true; + break; + } + } - struct netdev *netdev = netdev_from_name(argv[1]); - if (netdev) { - netdev_close(netdev); + /* Print an error if the device is not already initialised */ + if (!found) { snprintf(response, sizeof(response), - "Port '%s' is being used. Remove it before detaching", + "'%s' is not an initialized DPDK device - cannot detach", argv[1]); goto error; } + /* Check if the device is already in use by a port */ + LIST_FOR_EACH(dev, list_node, &dpdk_list) { + if (dev->port_id == port_id) { + in_use = true; + break; + } + } + + /* If the device is in use, advise the user to remove it */ + if (in_use) { + snprintf(response, sizeof(response), + "Port '%s' is using PCI device '%s'." + "Remove it before detaching", + dev->up.name, argv[1]); + goto error; + } + + /* It is safe to detach the device */ rte_eth_dev_close(port_id); ret = rte_eth_dev_detach(port_id, devname); if (ret < 0) { snprintf(response, sizeof(response), - "Port '%s' can not be detached", argv[1]); + "Device '%s' can not be detached", argv[1]); goto error; } snprintf(response, sizeof(response), - "Port '%s' has been detached", argv[1]); + "Device '%s' has been detached", argv[1]); + VLOG_INFO("DPDK PCI device %i:%i:%i.%i no longer available", + addr.domain, addr.bus, + addr.devid, addr.function); + + /* Remove PCI address from list of available devices */ + pci_devs[port_id].domain = 0; + pci_devs[port_id].bus = 0; + pci_devs[port_id].devid = 0; + pci_devs[port_id].function = 0; ovs_mutex_unlock(&dpdk_mutex); unixctl_command_reply(conn, response); @@ -3387,6 +3472,8 @@ dpdk_init__(const struct smap *ovs_other_config) int argc, argc_tmp; bool auto_determine = true; int err = 0; + int i = 0; + struct rte_eth_dev_info info; cpu_set_t cpuset; #ifndef VHOST_CUSE char *sock_dir_subcomponent; @@ -3509,6 +3596,15 @@ dpdk_init__(const struct smap *ovs_other_config) atexit(deferred_argv_release); + memset(pci_devs, 0x0, sizeof(pci_devs)); + nb_ports = rte_eth_dev_count(); + for (i = 0; i < nb_ports; i++) { + rte_eth_dev_info_get(i, &info); + pci_devs[i] = info.pci_dev->addr; + VLOG_INFO("DPDK PCI device %i:%i:%i.%i available", pci_devs[i].domain, + pci_devs[i].bus, pci_devs[i].devid, pci_devs[i].function); + } + rte_memzone_dump(stdout); rte_eal_init_ret = 0; -- 2.4.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev