Dealing with CPU masks can be confusing and unnecessary for simple configurations. This commit introduces the 'other_config:n-pmd-cores' key to specify the desired number of CPU cores reserved to the PMD threads. The 'other_config:pmd-cpu-mask' (if specified) overrides this parameter.
Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/dpif-netdev.c | 122 +++++++++++++++++++++++++++++++-------------- lib/dpif-provider.h | 7 ++- lib/dpif.c | 6 ++- lib/dpif.h | 2 +- lib/ovs-numa.c | 15 ++---- lib/ovs-numa.h | 8 +-- ofproto/ofproto-dpif.c | 4 +- ofproto/ofproto-provider.h | 2 + ofproto/ofproto.c | 7 +++ ofproto/ofproto.h | 1 + vswitchd/bridge.c | 2 + vswitchd/vswitch.xml | 31 +++++++++++- 12 files changed, 146 insertions(+), 61 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e632614..963d99e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -209,10 +209,15 @@ struct dp_netdev { * 'struct dp_netdev_pmd_thread' in 'per_pmd_key'. */ ovsthread_key_t per_pmd_key; - /* Number of rx queues for each dpdk interface and the cpu mask - * for pin of pmd threads. */ + /* Number of rx queues for each dpdk interface */ size_t n_dpdk_rxqs; + /* Maximum number of PMD threads. Ignored if 'pmd_cmask' != NULL */ + int n_pmd_threads; + /* CPU cores used for PMD threads. There will be one thread for each core + * set in the CPU mask. */ char *pmd_cmask; + /* Non pmd threads will be restricted to use the CPU set specified by this + * mask */ char *nonpmd_cmask; uint64_t last_tnl_conf_seq; }; @@ -435,13 +440,13 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp, static struct dp_netdev_pmd_thread * dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos); static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp); -static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id); -static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id); +static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id, int); static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp); static void dp_netdev_del_pmd(struct dp_netdev_pmd_thread *pmd); static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd); +static int get_n_pmd_threads_on_numa(struct dp_netdev *dp, int numa_id); static inline bool emc_entry_alive(struct emc_entry *ce); static void emc_clear_entry(struct emc_entry *ce); @@ -624,12 +629,13 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovs_mutex_init_recursive(&dp->non_pmd_mutex); ovsthread_key_create(&dp->per_pmd_key, NULL); - /* Reserves the core NON_PMD_CORE_ID for all non-pmd threads. */ - ovs_numa_try_pin_core_specific(NON_PMD_CORE_ID); + /* There can never be a pmd thread on NON_PMD_CORE_ID. */ + ovs_numa_core_disable_pmd(NON_PMD_CORE_ID); non_pmd = xzalloc(sizeof *non_pmd); dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID, OVS_NUMA_UNSPEC); dp->n_dpdk_rxqs = NR_QUEUE; + dp->n_pmd_threads = NR_PMD_THREADS; ovs_mutex_lock(&dp->port_mutex); error = do_add_port(dp, name, "internal", ODPP_LOCAL); @@ -905,9 +911,20 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, cmap_insert(&dp->ports, &port->node, hash_port_no(port_no)); if (netdev_is_pmd(netdev)) { - dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); - dp_netdev_reload_pmds(dp); - dp_netdev_set_nonpmd_affinity(); + int dev_numa_id = netdev_get_numa_id(netdev); + + if (!get_n_pmd_threads_on_numa(dp, dev_numa_id)) { + /* There weren't pmd threads on numa domain 'dev_numa_id'. + * Reset all the pmd threads to distribute the pmd threads + * among numa domains */ + dp_netdev_destroy_all_pmds(dp); + dp_netdev_reset_pmd_threads(dp); + } else { + /* There are already pmd threads on numa domain 'dev_numa_id'. + * Redistribute the queues */ + dp_netdev_reload_pmds(dp); + dp_netdev_set_nonpmd_affinity(); + } } seq_change(dp->port_seq); @@ -1094,7 +1111,8 @@ do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) /* If there is no netdev on the numa node, deletes the pmd threads * for that numa. Else, just reloads the queues. */ if (!has_pmd_port_for_numa(dp, numa_id)) { - dp_netdev_del_pmds_on_numa(dp, numa_id); + dp_netdev_destroy_all_pmds(dp); + dp_netdev_reset_pmd_threads(dp); } dp_netdev_reload_pmds(dp); } @@ -2139,17 +2157,25 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) * is changed. */ static bool pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, - const char *cmask_pmd) + const char *cmask_pmd, int n_pmd_cores) { if (dp->n_dpdk_rxqs != rxqs) { return true; - } else { - if (dp->pmd_cmask != NULL && cmask_pmd != NULL) { - return strcmp(dp->pmd_cmask, cmask_pmd); - } else { - return (dp->pmd_cmask != NULL || cmask_pmd != NULL); - } } + + if (dp->pmd_cmask != NULL && cmask_pmd != NULL) { + return strcmp(dp->pmd_cmask, cmask_pmd); + } + + if (dp->pmd_cmask != NULL || cmask_pmd != NULL) { + return true; + } + + if (dp->pmd_cmask == NULL) { + return dp->n_pmd_threads != n_pmd_cores; + } + + return false; } /* Returns true if the configuration for nonpmd cpu mask is changed */ @@ -2166,11 +2192,12 @@ nonpmd_config_changed(const struct dp_netdev *dp, const char *cmask_nonpmd) /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */ static int dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask_pmd, const char *cmask_nonpmd) + int n_pmd_cores, const char *cmask_pmd, + const char *cmask_nonpmd) { struct dp_netdev *dp = get_dp_netdev(dpif); - if (pmd_config_changed(dp, n_rxqs, cmask_pmd)) { + if (pmd_config_changed(dp, n_rxqs, cmask_pmd, n_pmd_cores)) { struct dp_netdev_port *port; dp_netdev_destroy_all_pmds(dp); @@ -2204,6 +2231,7 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, } } dp->n_dpdk_rxqs = n_rxqs; + dp->n_pmd_threads = n_pmd_cores; /* Reconfigures the cpu mask. */ ovs_numa_set_cpu_mask_pmd(cmask_pmd); @@ -2212,6 +2240,7 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, free(dp->nonpmd_cmask); dp->pmd_cmask = cmask_pmd ? xstrdup(cmask_pmd) : NULL; dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL; + ovs_numa_core_disable_pmd(NON_PMD_CORE_ID); /* Restores all pmd threads. */ dp_netdev_reset_pmd_threads(dp); @@ -2625,23 +2654,10 @@ dp_netdev_destroy_all_pmds(struct dp_netdev *dp) } } -/* Deletes all pmd threads on numa node 'numa_id'. */ -static void -dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id) -{ - struct dp_netdev_pmd_thread *pmd; - - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - if (pmd->numa_id == numa_id) { - dp_netdev_del_pmd(pmd); - } - } -} - /* Checks the numa node id of 'netdev' and starts pmd threads for * the numa node. */ static void -dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) +dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id, int nr_threads) { int n_pmds; @@ -2667,8 +2683,9 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id) } /* If cpu mask is specified, uses all unpinned cores, otherwise - * tries creating NR_PMD_THREADS pmd threads. */ - can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, NR_PMD_THREADS); + * tries creating 'nr_threads' pmd threads. */ + can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, nr_threads); + for (i = 0; i < can_have; i++) { struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); int core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); @@ -2689,15 +2706,46 @@ static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp) { struct dp_netdev_port *port; + int max_numa = ovs_numa_get_n_numas(); + unsigned long *numabitmap; + int numa_id, nr_numa; + + if (max_numa < 1) { + max_numa = 1; + } + + numabitmap = bitmap_allocate(max_numa); CMAP_FOR_EACH (port, node, &dp->ports) { if (netdev_is_pmd(port->netdev)) { - int numa_id = netdev_get_numa_id(port->netdev); + numa_id = netdev_get_numa_id(port->netdev); - dp_netdev_set_pmds_on_numa(dp, numa_id); + bitmap_set1(numabitmap, numa_id); + } + } + + nr_numa = bitmap_count1(numabitmap, max_numa); + if (nr_numa) { + int n_threads_per_numa, n_threads_remainder; + + if (dp->n_pmd_threads == 0) { + /* Default: just create one pmd threads per numa node */ + n_threads_per_numa = 1; + n_threads_remainder = 0; + } else { + n_threads_per_numa = dp->n_pmd_threads / nr_numa; + n_threads_remainder = dp->n_pmd_threads % nr_numa; + } + + BITMAP_FOR_EACH_1(numa_id, max_numa, numabitmap) { + dp_netdev_set_pmds_on_numa(dp, numa_id, n_threads_per_numa + + n_threads_remainder); + n_threads_remainder = 0; } } dp_netdev_set_nonpmd_affinity(); + + bitmap_free(numabitmap); } static char * diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index e54817d..41dcde4 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -310,9 +310,12 @@ struct dpif_class { * configuration. 'n_rxqs' configures the number of rx_queues, which * are distributed among threads. 'cmask_pmd' configures the cpu mask * for setting the polling threads' cpu affinity. 'cmask_nonpmd' - * configures the cpumask of the remaining OVS threads */ + * configures the cpumask of the remaining OVS threads. + * If 'cmask_pmd' is NULL, 'n_pmd_cores' cores will be used, + * otherwise 'n_pmd_cores' will be ignored */ int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask_pmd, const char *cmask_nonpmd); + int n_pmd_cores, const char *cmask_pmd, + const char *cmask_nonpmd); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ diff --git a/lib/dpif.c b/lib/dpif.c index 32bc005..cbb7399 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1383,12 +1383,14 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall) * configuration. */ int dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask, const char *cmask_nonpmd) + int n_pmd_cores, const char *cmask, + const char *cmask_nonpmd) { int error = 0; if (dpif->dpif_class->poll_threads_set) { - error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask, + error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, + n_pmd_cores, cmask, cmask_nonpmd); if (error) { log_operation(dpif, "poll_threads_set", error); diff --git a/lib/dpif.h b/lib/dpif.h index 68774bf..4ee1a69 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -819,7 +819,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); int dpif_recv_set(struct dpif *, bool enable); int dpif_handlers_set(struct dpif *, uint32_t n_handlers); -int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, +int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, int n_pmd_cores, const char *cmask, const char *cmask_nonpmd); int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, struct ofpbuf *); diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 3b432f1..8c6a6e1 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -287,22 +287,15 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) return OVS_CORE_UNSPEC; } -/* Given 'core_id', tries to pin that core. Returns true, if succeeds. - * False, if the core has already been pinned, or if it is invalid or - * not available. */ -bool -ovs_numa_try_pin_core_specific(int core_id) +/* Removes the core 'core_id' from the pmd cpu mask */ +void +ovs_numa_core_disable_pmd(int core_id) { struct cpu_core *core = get_core_by_core_id(core_id); if (core) { - if (core->available_pmd && !core->pinned) { - core->pinned = true; - return true; - } + core->available_pmd = false; } - - return false; } /* Searches through all cores for an unpinned and available core. Returns diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 04f598a..ea7a759 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -52,7 +52,7 @@ int ovs_numa_get_n_cores(void); int ovs_numa_get_numa_id(int core_id); int ovs_numa_get_n_cores_on_numa(int numa_id); int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id); -bool ovs_numa_try_pin_core_specific(int core_id); +void ovs_numa_core_disable_pmd(int core_id); int ovs_numa_get_unpinned_core_any(void); int ovs_numa_get_unpinned_core_on_numa(int numa_id); void ovs_numa_unpin_core(int core_id); @@ -131,10 +131,10 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id OVS_UNUSED) return OVS_CORE_UNSPEC; } -static inline bool -ovs_numa_try_pin_core_specific(int core_id OVS_UNUSED) +static inline void +ovs_numa_core_disable_pmd(int core_id OVS_UNUSED) { - return false; + /* Nothing */ } static inline int diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 4b45bb2..87106a5 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -557,8 +557,8 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } - dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask, - nonpmd_cpu_mask); + dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, n_pmd_cores, + pmd_cpu_mask, nonpmd_cpu_mask); if (backer->need_revalidate) { struct ofproto_dpif *ofproto; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 54f358d..61a137b 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -457,6 +457,8 @@ extern size_t n_handlers, n_revalidators; /* Number of rx queues to be created for each dpdk interface. */ extern size_t n_dpdk_rxqs; +/* Maximum number of pmd threads. Ignored if 'pmd_cpu_mask' != NULL */ +extern int n_pmd_cores; /* Cpu mask for pmd threads. */ extern char *pmd_cpu_mask; extern char *nonpmd_cpu_mask; diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 4ee7e37..3601ae6 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -305,6 +305,7 @@ unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; size_t n_handlers, n_revalidators; size_t n_dpdk_rxqs; +int n_pmd_cores; char *pmd_cpu_mask; char *nonpmd_cpu_mask; @@ -742,6 +743,12 @@ ofproto_set_n_dpdk_rxqs(int n_rxqs) } void +ofproto_set_n_pmd_cores(int n_cores) +{ + n_pmd_cores = MAX(n_cores, 0); +} + +void ofproto_set_pmd_cpu_mask(const char *cmask) { free(pmd_cpu_mask); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index bc7359b..3dada59 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -317,6 +317,7 @@ int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, const struct ofproto_mcast_snooping_port_settings *s); void ofproto_set_threads(int n_handlers, int n_revalidators); void ofproto_set_n_dpdk_rxqs(int n_rxqs); +void ofproto_set_n_pmd_cores(int n_cores); void ofproto_set_pmd_cpu_mask(const char *cmask); void ofproto_set_nonpmd_cpu_mask(const char *cmask); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 24f780c..5831afb 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -572,6 +572,8 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) OFPROTO_MAX_IDLE_DEFAULT)); ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config, "n-dpdk-rxqs", 0)); + ofproto_set_n_pmd_cores(smap_get_int(&ovs_cfg->other_config, + "n-pmd-cores", 0)); ofproto_set_pmd_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask")); ofproto_set_nonpmd_cpu_mask(smap_get(&ovs_cfg->other_config, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index b7388b4..b7dc26f 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -161,6 +161,24 @@ </p> </column> + <column name="other_config" key="n-cores" + type='{"type": "integer", "minInteger": 0}'> + <p> + Specifies the maximum number of cores that the userspace datapath is + allowed to use to process packets (i.e. the maximum number of PMD + threads that will be created by OVS). The special value 0 means + that OVS will use a single core per NUMA domain. + </p> + <p> + If ``other_config:pmd-cpu-mask'' is specified, this value will be + ignored and the CPU mask will be honored. + </p> + <p> + The default is 0. It means that OVS will use only one core per + NUMA domain to process packets in the userspace datapath. + </p> + </column> + <column name="other_config" key="pmd-cpu-mask"> <p> Specifies CPU mask for setting the cpu affinity of PMD (Poll @@ -175,8 +193,13 @@ those uncovered cores are considered not set. </p> <p> - If not specified, one pmd thread will be created for each numa node - and pinned to any available core on the numa node by default. + Please note that core 0 (NON_PMD_CORE_ID in the code) is reserved + and will never be used for a PMD thread. If set in the mask, it will + be ignored. + </p> + <p> + If not specified, the ``other_config:n-cores'' key will be honored. + If it is specified, ``other_config:n-cores'' will be ignored. </p> </column> @@ -195,6 +218,10 @@ the cores used for PMD threads operations. </p> <p> + Please note that core 0 (NON_PMD_CORE_ID in the code) is always used + for non PMD threads, even if unset in this mask. + </p> + <p> If not specified, the non PMD threads will be bound to every core not used for pmd operations. </p> -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev