Hi, Daniele. Comment inline.
Best regards, Ilya Maximets. On 28.02.2016 23:13, Daniele Di Proietto wrote: > This introduces in dpif-netdev and netdev-dpdk the first use for the > newly introduce reconfigure netdev call. > > When a request to change the number of queues comes, netdev-dpdk will > remember this and notify the upper layer via > netdev_request_reconfigure(). > > The datapath, instead of periodically calling netdev_set_multiq(), can > detect this and call reconfigure(). > > This mechanism can also be used to: > * Automatically match the number of rxq with the one provided by qemu > via the new_device callback. > * Provide a way to change the MTU of dpdk devices at runtime. > > Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> > --- > lib/dpif-netdev.c | 69 ++++++++++----------- > lib/netdev-dpdk.c | 167 > +++++++++++++++++++++++++++++--------------------- > lib/netdev-provider.h | 19 ++---- > lib/netdev.c | 30 ++------- > lib/netdev.h | 3 +- > 5 files changed, 139 insertions(+), 149 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 2adba89..3421df7 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -256,8 +256,6 @@ struct dp_netdev_port { > unsigned n_rxq; /* Number of elements in 'rxq' */ > struct netdev_rxq **rxq; > char *type; /* Port type as requested by user. */ > - int latest_requested_n_rxq; /* Latest requested from netdev number > - of rx queues. */ > }; > > /* Contained by struct dp_netdev_flow's 'stats' member. */ > @@ -1161,8 +1159,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, > const char *type, > /* There can only be ovs_numa_get_n_cores() pmd threads, > * so creates a txq for each, and one extra for the non > * pmd threads. */ > - error = netdev_set_multiq(netdev, n_cores + 1, > - netdev_requested_n_rxq(netdev)); > + error = netdev_set_multiq(netdev, n_cores + 1); > if (error && (error != EOPNOTSUPP)) { > VLOG_ERR("%s, cannot set multiq", devname); > goto out_close; > @@ -1174,7 +1171,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, > const char *type, > port->n_rxq = netdev_n_rxq(netdev); > port->rxq = xmalloc(sizeof *port->rxq * port->n_rxq); > port->type = xstrdup(type); > - port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev); > + > + if (netdev_is_reconf_required(netdev)) { > + error = netdev_reconfigure(netdev); > + if (error) { > + goto out_close; > + } > + } > + > n_open_rxqs = 0; > for (i = 0; i < port->n_rxq; i++) { > error = netdev_rxq_open(netdev, &port->rxq[i], i); > @@ -2450,25 +2454,6 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op > **ops, size_t n_ops) > } > } > > -/* Returns true if the configuration for rx queues or cpu mask > - * is changed. */ > -static bool > -pmd_n_rxq_changed(const struct dp_netdev *dp) > -{ > - struct dp_netdev_port *port; > - > - CMAP_FOR_EACH (port, node, &dp->ports) { > - int requested_n_rxq = netdev_requested_n_rxq(port->netdev); > - > - if (netdev_is_pmd(port->netdev) > - && port->latest_requested_n_rxq != requested_n_rxq) { > - return true; > - } > - } > - > - return false; > -} > - > static bool > cmask_equals(const char *a, const char *b) > { > @@ -2601,14 +2586,12 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > dp_netdev_destroy_all_pmds(dp); > > CMAP_FOR_EACH (port, node, &dp->ports) { > - struct netdev *netdev = port->netdev; > - int requested_n_rxq = netdev_requested_n_rxq(netdev); > - if (netdev_is_pmd(port->netdev) > - && port->latest_requested_n_rxq != requested_n_rxq) { > + if (netdev_is_reconf_required(port->netdev)) { > cmap_remove(&dp->ports, &port->node, > hash_odp_port(port->port_no)); > hmapx_add(&to_reconfigure, port); > } > } > + > ovs_mutex_unlock(&dp->port_mutex); > > /* Waits for the other threads to see the ports removed from the cmap, > @@ -2617,11 +2600,9 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > > ovs_mutex_lock(&dp->port_mutex); > HMAPX_FOR_EACH (node, &to_reconfigure) { > - int requested_n_rxq = netdev_requested_n_rxq(port->netdev); > int i, err; > > port = node->data; > - requested_n_rxq = netdev_requested_n_rxq(port->netdev); > /* Closes the existing 'rxq's. */ > for (i = 0; i < port->n_rxq; i++) { > netdev_rxq_close(port->rxq[i]); > @@ -2630,17 +2611,14 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > port->n_rxq = 0; > > /* Sets the new rx queue config. */ > - err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1, > - requested_n_rxq); > + err = netdev_reconfigure(port->netdev); > if (err && (err != EOPNOTSUPP)) { > - VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u", > - netdev_get_name(port->netdev), > - requested_n_rxq); > + VLOG_ERR("Failed to set interface %s new configuration", > + netdev_get_name(port->netdev)); > do_destroy_port(port); > failed_config = true; > continue; > } > - port->latest_requested_n_rxq = requested_n_rxq; > /* If the netdev_reconfigure() above succeeds, reopens the 'rxq's and > * inserts the port back in the cmap, to allow transmitting packets. > */ > port->n_rxq = netdev_n_rxq(port->netdev); > @@ -2671,6 +2649,21 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > dp_netdev_reset_pmd_threads(dp); > } > > +/* Returns true if one of the netdevs in 'dp' requires a reconfiguration */ > +static bool > +ports_require_restart(const struct dp_netdev *dp) > +{ > + struct dp_netdev_port *port; > + > + CMAP_FOR_EACH (port, node, &dp->ports) { > + if (netdev_is_reconf_required(port->netdev)) { > + return true; > + } > + } > + > + return false; > +} > + > /* Return true if needs to revalidate datapath flows. */ > static bool > dpif_netdev_run(struct dpif *dpif) > @@ -2696,7 +2689,7 @@ dpif_netdev_run(struct dpif *dpif) > ovs_mutex_unlock(&dp->non_pmd_mutex); > > if (!cmask_equals(dp->pmd_cmask, dp->requested_pmd_cmask) > - || pmd_n_rxq_changed(dp)) { > + || ports_require_restart(dp)) { > reconfigure_pmd_threads(dp); > } > > @@ -2719,6 +2712,8 @@ dpif_netdev_wait(struct dpif *dpif) > > ovs_mutex_lock(&dp_netdev_mutex); > CMAP_FOR_EACH (port, node, &dp->ports) { > + netdev_wait_reconf_required(port->netdev); > + > if (!netdev_is_pmd(port->netdev)) { > int i; > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index a48ca71..17b8d51 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -237,6 +237,12 @@ struct netdev_dpdk { > > /* In dpdk_list. */ > struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); > + > + /* The following properties cannot be changed when a device is running, > + * so we remember the request and update them next time > + * netdev_dpdk*_reconfigure() is called */ > + int requested_n_txq; > + int requested_n_rxq; > }; > > struct netdev_rxq_dpdk { > @@ -614,7 +620,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > > netdev_->n_txq = NR_QUEUE; > netdev_->n_rxq = NR_QUEUE; > - netdev_->requested_n_rxq = NR_QUEUE; > + netdev->requested_n_rxq = NR_QUEUE; > + netdev->requested_n_txq = NR_QUEUE; > netdev->real_n_txq = NR_QUEUE; > > if (type == DPDK_DEV_ETH) { > @@ -796,7 +803,7 @@ netdev_dpdk_get_config(const struct netdev *netdev, > struct smap *args) > > ovs_mutex_lock(&dev->mutex); > > - smap_add_format(args, "requested_rx_queues", "%d", > netdev->requested_n_rxq); > + smap_add_format(args, "requested_rx_queues", "%d", dev->requested_n_rxq); > smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); > smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); > smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); > @@ -809,11 +816,13 @@ static int > netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int new_n_rxq = MAX(smap_get_int(args, "n_rxq", dev->requested_n_rxq), > 1); This should be done under dev->mutex. > > ovs_mutex_lock(&dev->mutex); > - netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", > - netdev->requested_n_rxq), 1); > - netdev_change_seq_changed(netdev); > + if (new_n_rxq != dev->requested_n_rxq) { > + dev->requested_n_rxq = new_n_rxq; > + netdev_request_reconfigure(netdev); > + } > ovs_mutex_unlock(&dev->mutex); > > return 0; > @@ -827,93 +836,44 @@ netdev_dpdk_get_numa_id(const struct netdev *netdev_) > return netdev->socket_id; > } > > -/* Sets the number of tx queues and rx queues for the dpdk interface. > - * If the configuration fails, do not try restoring its old configuration > - * and just returns the error. */ > +/* Sets the number of tx queues for the dpdk interface. */ > static int > -netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq, > - unsigned int n_rxq) > +netdev_dpdk_set_multiq(struct netdev *netdev_, unsigned int n_txq) > { > struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int err = 0; > - int old_rxq, old_txq; > > - if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { > - return err; > - } > - > - ovs_mutex_lock(&dpdk_mutex); > ovs_mutex_lock(&netdev->mutex); > > - rte_eth_dev_stop(netdev->port_id); > - > - old_txq = netdev->up.n_txq; > - old_rxq = netdev->up.n_rxq; > - netdev->up.n_txq = n_txq; > - netdev->up.n_rxq = n_rxq; > - > - rte_free(netdev->tx_q); > - err = dpdk_eth_dev_init(netdev); > - netdev_dpdk_alloc_txq(netdev, netdev->real_n_txq); > - if (err) { > - /* If there has been an error, it means that the requested queues > - * have not been created. Restore the old numbers. */ > - netdev->up.n_txq = old_txq; > - netdev->up.n_rxq = old_rxq; > + if (netdev->up.n_txq == n_txq) { > + goto out; > } > > - netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; > + netdev->requested_n_txq = n_txq; > + netdev_request_reconfigure(netdev_); > > +out: > ovs_mutex_unlock(&netdev->mutex); > - ovs_mutex_unlock(&dpdk_mutex); > - > return err; > + > } > > static int > -netdev_dpdk_vhost_cuse_set_multiq(struct netdev *netdev_, unsigned int n_txq, > - unsigned int n_rxq) > +netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq) > { > struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int err = 0; > > - if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { > - return err; > - } > - > - ovs_mutex_lock(&dpdk_mutex); > ovs_mutex_lock(&netdev->mutex); > > - netdev->up.n_txq = n_txq; > - netdev->real_n_txq = 1; > - netdev->up.n_rxq = 1; > - netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; > - > - ovs_mutex_unlock(&netdev->mutex); > - ovs_mutex_unlock(&dpdk_mutex); > - > - return err; > -} > - > -static int > -netdev_dpdk_vhost_set_multiq(struct netdev *netdev_, unsigned int n_txq, > - unsigned int n_rxq) > -{ > - struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > - int err = 0; > - > - if (netdev->up.n_txq == n_txq && netdev->up.n_rxq == n_rxq) { > - return err; > + if (netdev->up.n_txq == n_txq) { > + goto out; > } > > - ovs_mutex_lock(&dpdk_mutex); > - ovs_mutex_lock(&netdev->mutex); > - > - netdev->up.n_txq = n_txq; > - netdev->up.n_rxq = n_rxq; > + netdev->requested_n_txq = n_txq; > > +out: > ovs_mutex_unlock(&netdev->mutex); > - ovs_mutex_unlock(&dpdk_mutex); > > return err; > } > @@ -2239,8 +2199,69 @@ unlock_dpdk: > return err; > } > > -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \ > - GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \ > +static int > +netdev_dpdk_reconfigure(struct netdev *netdev_) > +{ > + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > + int err = 0; > + > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&netdev->mutex); > + rte_eth_dev_stop(netdev->port_id); > + > + netdev_->n_txq = netdev->requested_n_txq; > + netdev_->n_rxq = netdev->requested_n_rxq; > + > + rte_free(netdev->tx_q); > + err = dpdk_eth_dev_init(netdev); > + netdev_dpdk_alloc_txq(netdev, netdev->real_n_txq); > + > + netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; > + > + ovs_mutex_unlock(&netdev->mutex); > + ovs_mutex_unlock(&dpdk_mutex); > + > + return err; > +} > + > +static int > +netdev_dpdk_vhost_user_reconfigure(struct netdev *netdev_) > +{ > + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > + > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&netdev->mutex); > + > + netdev->up.n_txq = netdev->requested_n_txq; > + netdev->up.n_rxq = netdev->requested_n_rxq; > + > + ovs_mutex_unlock(&netdev->mutex); > + ovs_mutex_unlock(&dpdk_mutex); > + > + return 0; > +} > + > +static int > +netdev_dpdk_vhost_cuse_reconfigure(struct netdev *netdev_) > +{ > + struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > + > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&netdev->mutex); > + > + netdev->up.n_txq = netdev->requested_n_txq; > + netdev->real_n_txq = 1; > + netdev->up.n_rxq = 1; > + netdev->txq_needs_locking = netdev->real_n_txq != netdev->up.n_txq; > + > + ovs_mutex_unlock(&netdev->mutex); > + ovs_mutex_unlock(&dpdk_mutex); > + > + return 0; > +} > + > +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, > \ > + GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RECONFIGURE, RXQ_RECV) > \ > { \ > NAME, \ > INIT, /* init */ \ > @@ -2298,7 +2319,7 @@ unlock_dpdk: > NULL, /* arp_lookup */ \ > \ > netdev_dpdk_update_flags, \ > - NULL, /* reconfigure */ \ > + RECONFIGURE, \ > \ > netdev_dpdk_rxq_alloc, \ > netdev_dpdk_rxq_construct, \ > @@ -2419,6 +2440,7 @@ static const struct netdev_class dpdk_class = > netdev_dpdk_get_stats, > netdev_dpdk_get_features, > netdev_dpdk_get_status, > + netdev_dpdk_reconfigure, > netdev_dpdk_rxq_recv); > > static const struct netdev_class dpdk_ring_class = > @@ -2433,6 +2455,7 @@ static const struct netdev_class dpdk_ring_class = > netdev_dpdk_get_stats, > netdev_dpdk_get_features, > netdev_dpdk_get_status, > + netdev_dpdk_reconfigure, > netdev_dpdk_rxq_recv); > > static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class = > @@ -2441,12 +2464,13 @@ static const struct netdev_class OVS_UNUSED > dpdk_vhost_cuse_class = > dpdk_vhost_cuse_class_init, > netdev_dpdk_vhost_cuse_construct, > netdev_dpdk_vhost_destruct, > - netdev_dpdk_vhost_cuse_set_multiq, > + netdev_dpdk_vhost_set_multiq, > netdev_dpdk_vhost_send, > netdev_dpdk_vhost_get_carrier, > netdev_dpdk_vhost_get_stats, > NULL, > NULL, > + netdev_dpdk_vhost_cuse_reconfigure, > netdev_dpdk_vhost_rxq_recv); > > static const struct netdev_class OVS_UNUSED dpdk_vhost_user_class = > @@ -2461,6 +2485,7 @@ static const struct netdev_class OVS_UNUSED > dpdk_vhost_user_class = > netdev_dpdk_vhost_get_stats, > NULL, > NULL, > + netdev_dpdk_vhost_user_reconfigure, > netdev_dpdk_vhost_rxq_recv); > > void > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index 9646cca..29b3949 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -67,8 +67,6 @@ struct netdev { > * modify them. */ > int n_txq; > int n_rxq; > - /* Number of rx queues requested by user. */ > - int requested_n_rxq; > int ref_cnt; /* Times this devices was opened. */ > struct shash_node *node; /* Pointer to element in global map. > */ > struct ovs_list saved_flags_list; /* Contains "struct > netdev_saved_flags". */ > @@ -295,13 +293,8 @@ struct netdev_class { > * such info, returns NETDEV_NUMA_UNSPEC. */ > int (*get_numa_id)(const struct netdev *netdev); > > - /* Configures the number of tx queues and rx queues of 'netdev'. > - * Return 0 if successful, otherwise a positive errno value. > - * > - * 'n_rxq' specifies the maximum number of receive queues to create. > - * The netdev provider might choose to create less (e.g. if the hardware > - * supports only a smaller number). The actual number of queues created > - * is stored in the 'netdev->n_rxq' field. > + /* Configures the number of tx queues of 'netdev'. Returns 0 if > successful, > + * otherwise a positive errno value. > * > * 'n_txq' specifies the exact number of transmission queues to create. > * The caller will call netdev_send() concurrently from 'n_txq' different > @@ -309,12 +302,8 @@ struct netdev_class { > * making sure that these concurrent calls do not create a race condition > * by using multiple hw queues or locking. > * > - * On error, the tx queue and rx queue configuration is indeterminant. > - * Caller should make decision on whether to restore the previous or > - * the default configuration. Also, caller must make sure there is no > - * other thread accessing the queues at the same time. */ > - int (*set_multiq)(struct netdev *netdev, unsigned int n_txq, > - unsigned int n_rxq); > + * On error, the tx queue and rx queue configuration is unchanged. */ > + int (*set_multiq)(struct netdev *netdev, unsigned int n_txq); > > /* Sends buffers on 'netdev'. > * Returns 0 if successful (for every buffer), otherwise a positive errno > diff --git a/lib/netdev.c b/lib/netdev.c > index 2c0918b..49c5534 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -106,12 +106,6 @@ netdev_n_rxq(const struct netdev *netdev) > return netdev->n_rxq; > } > > -int > -netdev_requested_n_rxq(const struct netdev *netdev) > -{ > - return netdev->requested_n_rxq; > -} > - > bool > netdev_is_pmd(const struct netdev *netdev) > { > @@ -384,7 +378,6 @@ netdev_open(const char *name, const char *type, struct > netdev **netdevp) > /* By default enable one tx and rx queue per netdev. */ > netdev->n_txq = netdev->netdev_class->send ? 1 : 0; > netdev->n_rxq = netdev->netdev_class->rxq_alloc ? 1 : 0; > - netdev->requested_n_rxq = netdev->n_rxq; > > list_init(&netdev->saved_flags_list); > > @@ -685,37 +678,26 @@ netdev_rxq_drain(struct netdev_rxq *rx) > : 0); > } > > -/* Configures the number of tx queues and rx queues of 'netdev'. > - * Return 0 if successful, otherwise a positive errno value. > - * > - * 'n_rxq' specifies the maximum number of receive queues to create. > - * The netdev provider might choose to create less (e.g. if the hardware > - * supports only a smaller number). The caller can check how many have been > - * actually created by calling 'netdev_n_rxq()' > +/* Configures the number of tx queues of 'netdev'. Returns 0 if successful, > + * otherwise a positive errno value. > * > * 'n_txq' specifies the exact number of transmission queues to create. > * If this function returns successfully, the caller can make 'n_txq' > * concurrent calls to netdev_send() (each one with a different 'qid' in the > * range [0..'n_txq'-1]). > * > - * On error, the tx queue and rx queue configuration is indeterminant. > - * Caller should make decision on whether to restore the previous or > - * the default configuration. Also, caller must make sure there is no > - * other thread accessing the queues at the same time. */ > + * On error, the tx queue and rx queue configuration is unchanged */ > int > -netdev_set_multiq(struct netdev *netdev, unsigned int n_txq, > - unsigned int n_rxq) > +netdev_set_multiq(struct netdev *netdev, unsigned int n_txq) > { > int error; > > error = (netdev->netdev_class->set_multiq > - ? netdev->netdev_class->set_multiq(netdev, > - MAX(n_txq, 1), > - MAX(n_rxq, 1)) > + ? netdev->netdev_class->set_multiq(netdev, MAX(n_txq, 1)) > : EOPNOTSUPP); > > if (error && error != EOPNOTSUPP) { > - VLOG_DBG_RL(&rl, "failed to set tx/rx queue for network device %s:" > + VLOG_DBG_RL(&rl, "failed to set tx queue for network device %s:" > "%s", netdev_get_name(netdev), ovs_strerror(error)); > } > > diff --git a/lib/netdev.h b/lib/netdev.h > index c2a1d6c..bb3d297 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -142,7 +142,6 @@ bool netdev_is_reserved_name(const char *name); > > int netdev_n_txq(const struct netdev *netdev); > int netdev_n_rxq(const struct netdev *netdev); > -int netdev_requested_n_rxq(const struct netdev *netdev); > bool netdev_is_pmd(const struct netdev *netdev); > > /* Open and close. */ > @@ -168,7 +167,7 @@ const char *netdev_get_type_from_name(const char *); > int netdev_get_mtu(const struct netdev *, int *mtup); > int netdev_set_mtu(const struct netdev *, int mtu); > int netdev_get_ifindex(const struct netdev *); > -int netdev_set_multiq(struct netdev *, unsigned int n_txq, unsigned int > n_rxq); > +int netdev_set_multiq(struct netdev *, unsigned int n_txq); > > /* Packet reception. */ > int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id); > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev