This introduces in dpif-netdev and netdev-dpdk the first use for the newly introduce reconfigure netdev call.
When a request to change the number of queues comes, netdev-dpdk will remember this and notify the upper layer via netdev_request_reconfigure(). The datapath, instead of periodically calling netdev_set_multiq(), can detect this and call reconfigure(). This mechanism can also be used to: * Automatically match the number of rxq with the one provided by qemu via the new_device callback. * Provide a way to change the MTU of dpdk devices at runtime. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/dpif-netdev.c | 71 +++++++++--------- lib/netdev-dpdk.c | 195 ++++++++++++++++++++++++++------------------------ lib/netdev-provider.h | 23 +++--- lib/netdev.c | 34 +++------ lib/netdev.h | 3 +- 5 files changed, 156 insertions(+), 170 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 9fbf821..7a775d8 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -256,8 +256,6 @@ struct dp_netdev_port { unsigned n_rxq; /* Number of elements in 'rxq' */ struct netdev_rxq **rxq; char *type; /* Port type as requested by user. */ - int latest_requested_n_rxq; /* Latest requested from netdev number - of rx queues. */ }; /* Contained by struct dp_netdev_flow's 'stats' member. */ @@ -1161,20 +1159,26 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, /* There can only be ovs_numa_get_n_cores() pmd threads, * so creates a txq for each, and one extra for the non * pmd threads. */ - error = netdev_set_multiq(netdev, n_cores + 1, - netdev_requested_n_rxq(netdev)); + error = netdev_set_multiq(netdev, n_cores + 1); if (error && (error != EOPNOTSUPP)) { VLOG_ERR("%s, cannot set multiq", devname); goto out_close; } } + + if (netdev_is_reconf_required(netdev)) { + error = netdev_reconfigure(netdev); + if (error) { + goto out_close; + } + } + port = xzalloc(sizeof *port); port->port_no = port_no; port->netdev = netdev; port->n_rxq = netdev_n_rxq(netdev); port->rxq = xmalloc(sizeof *port->rxq * port->n_rxq); port->type = xstrdup(type); - port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev); for (i = 0; i < port->n_rxq; i++) { error = netdev_rxq_open(netdev, &port->rxq[i], i); @@ -2450,24 +2454,6 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) } } -/* Returns true if the configuration for rx queues is changed. */ -static bool -pmd_n_rxq_changed(const struct dp_netdev *dp) -{ - struct dp_netdev_port *port; - - CMAP_FOR_EACH (port, node, &dp->ports) { - int requested_n_rxq = netdev_requested_n_rxq(port->netdev); - - if (netdev_is_pmd(port->netdev) - && port->latest_requested_n_rxq != requested_n_rxq) { - return true; - } - } - - return false; -} - static bool cmask_equals(const char *a, const char *b) { @@ -2600,14 +2586,12 @@ reconfigure_pmd_threads(struct dp_netdev *dp) dp_netdev_destroy_all_pmds(dp); CMAP_FOR_EACH (port, node, &dp->ports) { - struct netdev *netdev = port->netdev; - int requested_n_rxq = netdev_requested_n_rxq(netdev); - if (netdev_is_pmd(port->netdev) - && port->latest_requested_n_rxq != requested_n_rxq) { + if (netdev_is_reconf_required(port->netdev)) { cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); hmapx_add(&to_reconfigure, port); } } + ovs_mutex_unlock(&dp->port_mutex); /* Waits for the other threads to see the ports removed from the cmap, @@ -2616,10 +2600,9 @@ reconfigure_pmd_threads(struct dp_netdev *dp) ovs_mutex_lock(&dp->port_mutex); HMAPX_FOR_EACH (node, &to_reconfigure) { - int requested_n_rxq, i, err; + int i, err; port = node->data; - requested_n_rxq = netdev_requested_n_rxq(port->netdev); /* Closes the existing 'rxq's. */ for (i = 0; i < port->n_rxq; i++) { netdev_rxq_close(port->rxq[i]); @@ -2627,18 +2610,15 @@ reconfigure_pmd_threads(struct dp_netdev *dp) } port->n_rxq = 0; - /* Sets the new rx queue config. */ - err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1, - requested_n_rxq); + /* Allows the netdev to apply the pending configuration changes. */ + err = netdev_reconfigure(port->netdev); if (err && (err != EOPNOTSUPP)) { - VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u", - netdev_get_name(port->netdev), - requested_n_rxq); + VLOG_ERR("Failed to set interface %s new configuration", + netdev_get_name(port->netdev)); do_destroy_port(port); failed_config = true; continue; } - port->latest_requested_n_rxq = requested_n_rxq; /* If the netdev_reconfigure() above succeeds, reopens the 'rxq's and * inserts the port back in the cmap, to allow transmitting packets. */ port->n_rxq = netdev_n_rxq(port->netdev); @@ -2669,6 +2649,21 @@ reconfigure_pmd_threads(struct dp_netdev *dp) dp_netdev_reset_pmd_threads(dp); } +/* Returns true if one of the netdevs in 'dp' requires a reconfiguration */ +static bool +ports_require_restart(const struct dp_netdev *dp) +{ + struct dp_netdev_port *port; + + CMAP_FOR_EACH (port, node, &dp->ports) { + if (netdev_is_reconf_required(port->netdev)) { + return true; + } + } + + return false; +} + /* Return true if needs to revalidate datapath flows. */ static bool dpif_netdev_run(struct dpif *dpif) @@ -2694,7 +2689,7 @@ dpif_netdev_run(struct dpif *dpif) ovs_mutex_unlock(&dp->non_pmd_mutex); if (!cmask_equals(dp->pmd_cmask, dp->requested_pmd_cmask) - || pmd_n_rxq_changed(dp)) { + || ports_require_restart(dp)) { reconfigure_pmd_threads(dp); } @@ -2717,6 +2712,8 @@ dpif_netdev_wait(struct dpif *dpif) ovs_mutex_lock(&dp_netdev_mutex); CMAP_FOR_EACH (port, node, &dp->ports) { + netdev_wait_reconf_required(port->netdev); + if (!netdev_is_pmd(port->netdev)) { int i; diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 585b05f..05a3624 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -339,6 +339,11 @@ struct netdev_dpdk { struct qos_conf *qos_conf; rte_spinlock_t qos_lock; + /* The following properties cannot be changed when a device is running, + * so we remember the request and update them next time + * netdev_dpdk*_reconfigure() is called */ + int requested_n_txq; + int requested_n_rxq; }; struct netdev_rxq_dpdk { @@ -720,7 +725,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no, netdev->n_txq = NR_QUEUE; netdev->n_rxq = NR_QUEUE; - netdev->requested_n_rxq = NR_QUEUE; + dev->requested_n_rxq = NR_QUEUE; + dev->requested_n_txq = NR_QUEUE; dev->real_n_txq = NR_QUEUE; if (type == DPDK_DEV_ETH) { @@ -903,7 +909,7 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) ovs_mutex_lock(&dev->mutex); - smap_add_format(args, "requested_rx_queues", "%d", netdev->requested_n_rxq); + smap_add_format(args, "requested_rx_queues", "%d", dev->requested_n_rxq); smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); @@ -916,11 +922,14 @@ static int netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int new_n_rxq; ovs_mutex_lock(&dev->mutex); - netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", - netdev->requested_n_rxq), 1); - netdev_change_seq_changed(netdev); + new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), 1); + if (new_n_rxq != dev->requested_n_rxq) { + dev->requested_n_rxq = new_n_rxq; + netdev_request_reconfigure(netdev); + } ovs_mutex_unlock(&dev->mutex); return 0; @@ -934,95 +943,24 @@ netdev_dpdk_get_numa_id(const struct netdev *netdev) return dev->socket_id; } -/* Sets the number of tx queues and rx queues for the dpdk interface. - * If the configuration fails, do not try restoring its old configuration - * and just returns the error. */ -static int -netdev_dpdk_set_multiq(struct netdev *netdev, unsigned int n_txq, - unsigned int n_rxq) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - int err = 0; - int old_rxq, old_txq; - - if (netdev->n_txq == n_txq && netdev->n_rxq == n_rxq) { - return err; - } - - ovs_mutex_lock(&dpdk_mutex); - ovs_mutex_lock(&dev->mutex); - - rte_eth_dev_stop(dev->port_id); - - old_txq = netdev->n_txq; - old_rxq = netdev->n_rxq; - netdev->n_txq = n_txq; - netdev->n_rxq = n_rxq; - - rte_free(dev->tx_q); - err = dpdk_eth_dev_init(dev); - netdev_dpdk_alloc_txq(dev, dev->real_n_txq); - if (err) { - /* If there has been an error, it means that the requested queues - * have not been created. Restore the old numbers. */ - netdev->n_txq = old_txq; - netdev->n_rxq = old_rxq; - } - - dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq; - - ovs_mutex_unlock(&dev->mutex); - ovs_mutex_unlock(&dpdk_mutex); - - return err; -} - +/* Sets the number of tx queues for the dpdk interface. */ static int -netdev_dpdk_vhost_cuse_set_multiq(struct netdev *netdev, unsigned int n_txq, - unsigned int n_rxq) +netdev_dpdk_set_multiq(struct netdev *netdev, unsigned int n_txq) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - int err = 0; - - if (netdev->n_txq == n_txq && netdev->n_rxq == n_rxq) { - return err; - } - ovs_mutex_lock(&dpdk_mutex); ovs_mutex_lock(&dev->mutex); - netdev->n_txq = n_txq; - dev->real_n_txq = 1; - netdev->n_rxq = 1; - dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq; - - ovs_mutex_unlock(&dev->mutex); - ovs_mutex_unlock(&dpdk_mutex); - - return err; -} - -static int -netdev_dpdk_vhost_set_multiq(struct netdev *netdev, unsigned int n_txq, - unsigned int n_rxq) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - int err = 0; - - if (netdev->n_txq == n_txq && netdev->n_rxq == n_rxq) { - return err; + if (dev->requested_n_txq == n_txq) { + goto out; } - ovs_mutex_lock(&dpdk_mutex); - ovs_mutex_lock(&dev->mutex); - - netdev->n_txq = n_txq; - netdev->n_rxq = n_rxq; + dev->requested_n_txq = n_txq; + netdev_request_reconfigure(netdev); +out: ovs_mutex_unlock(&dev->mutex); - ovs_mutex_unlock(&dpdk_mutex); - - return err; + return 0; } static struct netdev_rxq * @@ -2568,6 +2506,7 @@ egress_policer_qos_get(const struct netdev *netdev, struct smap *details) 1ULL * policer->app_srtcm_params.cir); smap_add_format(details, "cbs", "%llu", 1ULL * policer->app_srtcm_params.cbs); + return 0; } @@ -2637,8 +2576,80 @@ static const struct dpdk_qos_ops egress_policer_ops = { egress_policer_run }; -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \ - GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \ +static int +netdev_dpdk_reconfigure(struct netdev *netdev) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int err = 0; + + ovs_mutex_lock(&dpdk_mutex); + ovs_mutex_lock(&dev->mutex); + + if (netdev->n_txq == dev->requested_n_txq + && netdev->n_rxq == dev->requested_n_rxq) { + /* Reconfiguration is unnecessary */ + + goto out; + } + + rte_eth_dev_stop(dev->port_id); + + netdev->n_txq = dev->requested_n_txq; + netdev->n_rxq = dev->requested_n_rxq; + + rte_free(dev->tx_q); + err = dpdk_eth_dev_init(dev); + netdev_dpdk_alloc_txq(dev, dev->real_n_txq); + + dev->txq_needs_locking = dev->real_n_txq != netdev->n_txq; + +out: + + ovs_mutex_unlock(&dev->mutex); + ovs_mutex_unlock(&dpdk_mutex); + + return err; +} + +static int +netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + + ovs_mutex_lock(&dpdk_mutex); + ovs_mutex_lock(&dev->mutex); + + netdev->n_txq = dev->requested_n_txq; + netdev->n_rxq = dev->requested_n_rxq; + + ovs_mutex_unlock(&dev->mutex); + ovs_mutex_unlock(&dpdk_mutex); + + return 0; +} + +static int +netdev_dpdk_vhost_cuse_reconfigure(struct netdev *netdev) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + + ovs_mutex_lock(&dpdk_mutex); + ovs_mutex_lock(&dev->mutex); + + netdev->n_txq = dev->requested_n_txq; + dev->real_n_txq = 1; + netdev->n_rxq = 1; + dev->txq_needs_locking = dev->real_n_txq != dev->up.n_txq; + + ovs_mutex_unlock(&dev->mutex); + ovs_mutex_unlock(&dpdk_mutex); + + return 0; +} + +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SEND, \ + GET_CARRIER, GET_STATS, GET_FEATURES, \ + GET_STATUS, RECONFIGURE, RXQ_RECV) \ { \ NAME, \ INIT, /* init */ \ @@ -2656,7 +2667,7 @@ static const struct dpdk_qos_ops egress_policer_ops = { NULL, /* push header */ \ NULL, /* pop header */ \ netdev_dpdk_get_numa_id, /* get_numa_id */ \ - MULTIQ, /* set_multiq */ \ + netdev_dpdk_set_multiq, \ \ SEND, /* send */ \ NULL, /* send_wait */ \ @@ -2696,7 +2707,7 @@ static const struct dpdk_qos_ops egress_policer_ops = { NULL, /* arp_lookup */ \ \ netdev_dpdk_update_flags, \ - NULL, /* reconfigure */ \ + RECONFIGURE, \ \ netdev_dpdk_rxq_alloc, \ netdev_dpdk_rxq_construct, \ @@ -2811,12 +2822,12 @@ static const struct netdev_class dpdk_class = NULL, netdev_dpdk_construct, netdev_dpdk_destruct, - netdev_dpdk_set_multiq, netdev_dpdk_eth_send, netdev_dpdk_get_carrier, netdev_dpdk_get_stats, netdev_dpdk_get_features, netdev_dpdk_get_status, + netdev_dpdk_reconfigure, netdev_dpdk_rxq_recv); static const struct netdev_class dpdk_ring_class = @@ -2825,12 +2836,12 @@ static const struct netdev_class dpdk_ring_class = NULL, netdev_dpdk_ring_construct, netdev_dpdk_destruct, - netdev_dpdk_set_multiq, netdev_dpdk_ring_send, netdev_dpdk_get_carrier, netdev_dpdk_get_stats, netdev_dpdk_get_features, netdev_dpdk_get_status, + netdev_dpdk_reconfigure, netdev_dpdk_rxq_recv); static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class = @@ -2839,12 +2850,12 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class = dpdk_vhost_cuse_class_init, netdev_dpdk_vhost_cuse_construct, netdev_dpdk_vhost_destruct, - netdev_dpdk_vhost_cuse_set_multiq, netdev_dpdk_vhost_send, netdev_dpdk_vhost_get_carrier, netdev_dpdk_vhost_get_stats, NULL, NULL, + netdev_dpdk_vhost_cuse_reconfigure, netdev_dpdk_vhost_rxq_recv); static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class = @@ -2853,12 +2864,12 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class = dpdk_vhost_user_class_init, netdev_dpdk_vhost_user_construct, netdev_dpdk_vhost_destruct, - netdev_dpdk_vhost_set_multiq, netdev_dpdk_vhost_send, netdev_dpdk_vhost_get_carrier, netdev_dpdk_vhost_get_stats, NULL, NULL, + netdev_dpdk_vhost_user_reconfigure, netdev_dpdk_vhost_rxq_recv); void diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 0317910..40afbef 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -67,8 +67,6 @@ struct netdev { * modify them. */ int n_txq; int n_rxq; - /* Number of rx queues requested by user. */ - int requested_n_rxq; int ref_cnt; /* Times this devices was opened. */ struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ @@ -295,13 +293,8 @@ struct netdev_class { * such info, returns NETDEV_NUMA_UNSPEC. */ int (*get_numa_id)(const struct netdev *netdev); - /* Configures the number of tx queues and rx queues of 'netdev'. - * Return 0 if successful, otherwise a positive errno value. - * - * 'n_rxq' specifies the maximum number of receive queues to create. - * The netdev provider might choose to create less (e.g. if the hardware - * supports only a smaller number). The actual number of queues created - * is stored in the 'netdev->n_rxq' field. + /* Configures the number of tx queues of 'netdev'. Returns 0 if successful, + * otherwise a positive errno value. * * 'n_txq' specifies the exact number of transmission queues to create. * The caller will call netdev_send() concurrently from 'n_txq' different @@ -309,12 +302,12 @@ struct netdev_class { * making sure that these concurrent calls do not create a race condition * by using multiple hw queues or locking. * - * On error, the tx queue and rx queue configuration is indeterminant. - * Caller should make decision on whether to restore the previous or - * the default configuration. Also, caller must make sure there is no - * other thread accessing the queues at the same time. */ - int (*set_multiq)(struct netdev *netdev, unsigned int n_txq, - unsigned int n_rxq); + * The caller will call netdev_reconfigure() (if necessary) before using + * netdev_send() on any of the newly configured queues, giving the provider + * a chance to adjust its settings. + * + * On error, the tx queue configuration is unchanged. */ + int (*set_multiq)(struct netdev *netdev, unsigned int n_txq); /* Sends buffers on 'netdev'. * Returns 0 if successful (for every buffer), otherwise a positive errno diff --git a/lib/netdev.c b/lib/netdev.c index 30fd83d..302ba88 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -106,12 +106,6 @@ netdev_n_rxq(const struct netdev *netdev) return netdev->n_rxq; } -int -netdev_requested_n_rxq(const struct netdev *netdev) -{ - return netdev->requested_n_rxq; -} - bool netdev_is_pmd(const struct netdev *netdev) { @@ -384,7 +378,6 @@ netdev_open(const char *name, const char *type, struct netdev **netdevp) /* By default enable one tx and rx queue per netdev. */ netdev->n_txq = netdev->netdev_class->send ? 1 : 0; netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; - netdev->requested_n_rxq = netdev->n_rxq; list_init(&netdev->saved_flags_list); @@ -685,37 +678,30 @@ netdev_rxq_drain(struct netdev_rxq *rx) : 0); } -/* Configures the number of tx queues and rx queues of 'netdev'. - * Return 0 if successful, otherwise a positive errno value. - * - * 'n_rxq' specifies the maximum number of receive queues to create. - * The netdev provider might choose to create less (e.g. if the hardware - * supports only a smaller number). The caller can check how many have been - * actually created by calling 'netdev_n_rxq()' +/* Configures the number of tx queues of 'netdev'. Returns 0 if successful, + * otherwise a positive errno value. * * 'n_txq' specifies the exact number of transmission queues to create. * If this function returns successfully, the caller can make 'n_txq' * concurrent calls to netdev_send() (each one with a different 'qid' in the * range [0..'n_txq'-1]). * - * On error, the tx queue and rx queue configuration is indeterminant. - * Caller should make decision on whether to restore the previous or - * the default configuration. Also, caller must make sure there is no - * other thread accessing the queues at the same time. */ + * The change might not effective immediately. The caller must check if a + * reconfiguration is required with netdev_is_reconf_required() and eventually + * call netdev_reconfigure() before using the new queues. + * + * On error, the tx queue configuration is unchanged */ int -netdev_set_multiq(struct netdev *netdev, unsigned int n_txq, - unsigned int n_rxq) +netdev_set_multiq(struct netdev *netdev, unsigned int n_txq) { int error; error = (netdev->netdev_class->set_multiq - ? netdev->netdev_class->set_multiq(netdev, - MAX(n_txq, 1), - MAX(n_rxq, 1)) + ? netdev->netdev_class->set_multiq(netdev, MAX(n_txq, 1)) : EOPNOTSUPP); if (error && error != EOPNOTSUPP) { - VLOG_DBG_RL(&rl, "failed to set tx/rx queue for network device %s:" + VLOG_DBG_RL(&rl, "failed to set tx queue for network device %s:" "%s", netdev_get_name(netdev), ovs_strerror(error)); } diff --git a/lib/netdev.h b/lib/netdev.h index c2a1d6c..bb3d297 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -142,7 +142,6 @@ bool netdev_is_reserved_name(const char *name); int netdev_n_txq(const struct netdev *netdev); int netdev_n_rxq(const struct netdev *netdev); -int netdev_requested_n_rxq(const struct netdev *netdev); bool netdev_is_pmd(const struct netdev *netdev); /* Open and close. */ @@ -168,7 +167,7 @@ const char *netdev_get_type_from_name(const char *); int netdev_get_mtu(const struct netdev *, int *mtup); int netdev_set_mtu(const struct netdev *, int mtu); int netdev_get_ifindex(const struct netdev *); -int netdev_set_multiq(struct netdev *, unsigned int n_txq, unsigned int n_rxq); +int netdev_set_multiq(struct netdev *, unsigned int n_txq); /* Packet reception. */ int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id); -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev