The idea looks very good to me, thanks for working on it.

Very high level comments:

Do we need to be limited to pci devices?  Perhaps we can accept the same
string as rte_eth_dev_attach().

Would it be possible to integrate this more with the hotplug patch?  It
would be nice to avoid introducing extra appctl commands and call
rte_eth_dev_attach() if needed in netdev_dpdk_construct().

Thoughts?

Thanks,

Daniele

2016-07-15 9:34 GMT-07:00 Ciara Loftus <ciara.lof...@intel.com>:

> 'dpdk' ports no longer have naming restrictions. Now, instead
> of specifying the dpdk port ID as part of the name, the PCI
> address of the device must be specified via the 'dpdk-pci'
> option. eg.
>
> ovs-vsctl add-port br0 my-port
> ovs-vsctl set Interface my-port type=dpdk
> ovs-vsctl set Interface my-port options:dpdk-pci=0000:06:00.3
>
> Signed-off-by: Ciara Loftus <ciara.lof...@intel.com>
>
> v2:
> - remove global pci list
> - remove unnecessary parenthesis
> - remove return from void fn
> - print pci like dpdk
> - fix port ranges
> ---
>  INSTALL.DPDK-ADVANCED.md |   2 +-
>  INSTALL.DPDK.md          |  10 ++--
>  NEWS                     |   2 +
>  lib/netdev-dpdk.c        | 132
> ++++++++++++++++++++++++++++++++++++++---------
>  4 files changed, 116 insertions(+), 30 deletions(-)
>
> diff --git a/INSTALL.DPDK-ADVANCED.md b/INSTALL.DPDK-ADVANCED.md
> index 61b4e82..7370d03 100644
> --- a/INSTALL.DPDK-ADVANCED.md
> +++ b/INSTALL.DPDK-ADVANCED.md
> @@ -854,7 +854,7 @@ At this point, the user can create a ovs port using
> the add-port command.
>  It is also possible to detach a port from ovs, the user has to remove the
>  port using the del-port command, then it can be detached using:
>
> -`ovs-appctl netdev-dpdk/port-detach dpdk0`
> +`ovs-appctl netdev-dpdk/port-detach 0000:01:00.0`
>
>  This feature is not supported with VFIO and could not work with some NICs,
>  please refer to the [DPDK Port Hotplug Framework] in order to get more
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 5407794..9a781ff 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -258,13 +258,13 @@ advanced install guide [INSTALL.DPDK-ADVANCED.md]
>
>       `ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev`
>
> -     Now you can add DPDK devices. OVS expects DPDK device names to start
> with
> -     "dpdk" and end with a portid. vswitchd should print (in the log
> file) the
> -     number of dpdk devices found.
> +     Now you can add dpdk devices. The PCI address of the device needs to
> be
> +     set using the 'dpdk-pci' option. vswitchd should print (in the log
> file)
> +     the PCI addresses of dpdk devices found during initialisation.
>
>       ```
> -     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> -     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> +     ovs-vsctl add-port br0 dpdk0 -- set Interface dpdk0 type=dpdk
> options:dpdk-pci=0000:06:00.0
> +     ovs-vsctl add-port br0 dpdk1 -- set Interface dpdk1 type=dpdk
> options:dpdk-pci=0000:06:00.1
>       ```
>
>       After the DPDK ports get added to switch, a polling thread
> continuously polls
> diff --git a/NEWS b/NEWS
> index 9064225..03b9ba8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -59,6 +59,8 @@ Post-v2.5.0
>         node that device memory is located on if
> CONFIG_RTE_LIBRTE_VHOST_NUMA
>         is enabled in DPDK.
>       * Port Hotplug is now supported.
> +     * DPDK physical ports can now have arbitrary names. The PCI address
> of
> +       the device must be set using the 'dpdk-pci' option.
>     - Increase number of registers to 16.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 3fab52c..d2cceb2 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -58,6 +58,7 @@
>  #include "rte_config.h"
>  #include "rte_mbuf.h"
>  #include "rte_meter.h"
> +#include "rte_pci.h"
>  #include "rte_virtio_net.h"
>
>  VLOG_DEFINE_THIS_MODULE(dpdk);
> @@ -736,7 +737,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      /* If the 'sid' is negative, it means that the kernel fails
>       * to obtain the pci numa info.  In that situation, always
>       * use 'SOCKET0'. */
> -    if (type == DPDK_DEV_ETH) {
> +    if (type == DPDK_DEV_ETH && dev->port_id != -1) {
>          sid = rte_eth_dev_socket_id(port_no);
>      } else {
>          sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> @@ -772,9 +773,11 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int
> port_no,
>      dev->requested_n_txq = netdev->n_txq;
>
>      if (type == DPDK_DEV_ETH) {
> -        err = dpdk_eth_dev_init(dev);
> -        if (err) {
> -            goto unlock;
> +        if (dev->port_id != -1) {
> +            err = dpdk_eth_dev_init(dev);
> +            if (err) {
> +                goto unlock;
> +            }
>          }
>          netdev_dpdk_alloc_txq(dev, netdev->n_txq);
>          dev->txq_needs_locking = netdev->n_txq < dev->requested_n_txq;
> @@ -886,21 +889,14 @@ netdev_dpdk_vhost_user_construct(struct netdev
> *netdev)
>  static int
>  netdev_dpdk_construct(struct netdev *netdev)
>  {
> -    unsigned int port_no;
>      int err;
>
>      if (rte_eal_init_ret) {
>          return rte_eal_init_ret;
>      }
>
> -    /* Names always start with "dpdk" */
> -    err = dpdk_dev_parse_name(netdev->name, "dpdk", &port_no);
> -    if (err) {
> -        return err;
> -    }
> -
>      ovs_mutex_lock(&dpdk_mutex);
> -    err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_ETH);
> +    err = netdev_dpdk_init(netdev, -1, DPDK_DEV_ETH);
>      ovs_mutex_unlock(&dpdk_mutex);
>      return err;
>  }
> @@ -979,11 +975,39 @@ netdev_dpdk_get_config(const struct netdev *netdev,
> struct smap *args)
>      return 0;
>  }
>
> +static void
> +netdev_dpdk_associate_pci(struct netdev_dpdk *dev, struct rte_pci_addr
> *addr)
> +{
> +    int i = 0;
> +    struct rte_eth_dev_info info;
> +
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (!rte_eth_dev_is_valid_port(i)) {
> +            continue;
> +        }
> +        rte_eth_dev_info_get(i, &info);
> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, addr)) {
> +            dev->port_id = i;
> +            break;
> +        }
> +    }
> +
> +    if (dev->port_id != -1) {
> +        rte_eth_dev_stop(dev->port_id);
> +        dev->socket_id = rte_eth_dev_socket_id(dev->port_id);
> +        dpdk_eth_dev_init(dev);
> +    } else {
> +        VLOG_INFO("Invalid PCI address for device %s", dev->up.name);
> +    }
> +}
> +
>  static int
>  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      int new_n_rxq;
> +    struct rte_pci_addr addr;
> +    const char *pci_str;
>
>      ovs_mutex_lock(&dev->mutex);
>      new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1);
> @@ -991,6 +1015,19 @@ netdev_dpdk_set_config(struct netdev *netdev, const
> struct smap *args)
>          dev->requested_n_rxq = new_n_rxq;
>          netdev_request_reconfigure(netdev);
>      }
> +
> +    if (dev->port_id == -1) {
> +        pci_str = smap_get(args, "dpdk-pci");
> +        if (pci_str != NULL) {
> +            if (!eal_parse_pci_DomBDF(pci_str, &addr)) {
> +                netdev_dpdk_associate_pci(dev, &addr);
> +            } else {
> +                VLOG_ERR("Error parsing PCI address %s, please check
> format",
> +                          pci_str);
> +            }
> +        }
> +    }
> +
>      ovs_mutex_unlock(&dev->mutex);
>
>      return 0;
> @@ -2179,6 +2216,7 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>      int ret;
>      char response[128];
>      uint8_t port_id;
> +    struct rte_eth_dev_info info;
>
>      ovs_mutex_lock(&dpdk_mutex);
>
> @@ -2192,7 +2230,12 @@ netdev_dpdk_port_attach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>      }
>
>      snprintf(response, sizeof(response),
> -             "Device '%s' has been attached as 'dpdk%d'", argv[1],
> port_id);
> +             "Device '%s' has been attached", argv[1]);
> +
> +    rte_eth_dev_info_get(port_id, &info);
> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> +              info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> +              info.pci_dev->addr.devid, info.pci_dev->addr.function);
>
>      ovs_mutex_unlock(&dpdk_mutex);
>      unixctl_command_reply(conn, response);
> @@ -2204,41 +2247,71 @@ netdev_dpdk_port_detach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>  {
>      int ret;
>      char response[128];
> -    unsigned int parsed_port;
>      uint8_t port_id;
>      char devname[RTE_ETH_NAME_MAX_LEN];
> +    bool found = false;
> +    int i;
> +    struct rte_pci_addr addr;
> +    struct netdev_dpdk *dev;
> +    struct rte_eth_dev_info info;
>
>      ovs_mutex_lock(&dpdk_mutex);
>
> -    ret = dpdk_dev_parse_name(argv[1], "dpdk", &parsed_port);
> -    if (ret) {
> +    /* Parse the PCI address to a usable format */
> +    if (eal_parse_pci_DomBDF(argv[1], &addr)) {
>          snprintf(response, sizeof(response),
> -                 "'%s' is not a valid port", argv[1]);
> +                 "'%s' is not a valid PCI address format - cannot detach",
> +                 argv[1]);
>          goto error;
>      }
>
> -    port_id = parsed_port;
> +    /* Search for the address in the initialised devices */
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (!rte_eth_dev_is_valid_port(i)) {
> +            continue;
> +        }
> +        rte_eth_dev_info_get(i, &info);
> +        if (!rte_eal_compare_pci_addr(&info.pci_dev->addr, &addr)) {
> +            port_id = i;
> +            found = true;
> +            break;
> +        }
> +    }
>
> -    struct netdev *netdev = netdev_from_name(argv[1]);
> -    if (netdev) {
> -        netdev_close(netdev);
> +    /* Print an error if the device is not already initialised */
> +    if (!found) {
>          snprintf(response, sizeof(response),
> -                 "Port '%s' is being used. Remove it before detaching",
> +                 "'%s' is not an initialized DPDK device - cannot detach",
>                   argv[1]);
>          goto error;
>      }
>
> +    /* Check if the device is already in use by a port, and advise the
> user to
> +     * remove it if so */
> +    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> +        if (dev->port_id == port_id) {
> +            snprintf(response, sizeof(response),
> +                     "Port '%s' is using PCI device '%s'."
> +                     "Remove it before detaching",
> +                     dev->up.name, argv[1]);
> +            goto error;
> +        }
> +    }
> +
> +    /* It is safe to detach the device */
>      rte_eth_dev_close(port_id);
>
>      ret = rte_eth_dev_detach(port_id, devname);
>      if (ret < 0) {
>          snprintf(response, sizeof(response),
> -                 "Port '%s' can not be detached", argv[1]);
> +                 "Device '%s' can not be detached", argv[1]);
>          goto error;
>      }
>
>      snprintf(response, sizeof(response),
> -             "Port '%s' has been detached", argv[1]);
> +             "Device '%s' has been detached", argv[1]);
> +    VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" no longer available",
> +              addr.domain, addr.bus, addr.devid, addr.function);
>
>      ovs_mutex_unlock(&dpdk_mutex);
>      unixctl_command_reply(conn, response);
> @@ -3295,6 +3368,9 @@ dpdk_init__(const struct smap *ovs_other_config)
>      int argc, argc_tmp;
>      bool auto_determine = true;
>      int err = 0;
> +    int i = 0;
> +    uint8_t nb_ports = 0;
> +    struct rte_eth_dev_info info;
>      cpu_set_t cpuset;
>  #ifndef VHOST_CUSE
>      char *sock_dir_subcomponent;
> @@ -3417,6 +3493,14 @@ dpdk_init__(const struct smap *ovs_other_config)
>
>      atexit(deferred_argv_release);
>
> +    nb_ports = rte_eth_dev_count();
> +    for (i = 0; i < nb_ports; i++) {
> +        rte_eth_dev_info_get(i, &info);
> +        VLOG_INFO("DPDK PCI device "PCI_PRI_FMT" available",
> +                  info.pci_dev->addr.domain, info.pci_dev->addr.bus,
> +                  info.pci_dev->addr.devid, info.pci_dev->addr.function);
> +    }
> +
>      rte_memzone_dump(stdout);
>      rte_eal_init_ret = 0;
>
> --
> 2.4.3
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to