This commit introduces the 'other_config:nonpmd-cpu-mask' key to control the CPU affinity of non PMD threads.
Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/dpif-netdev.c | 62 +++++++++++++++++++++++++++++----- lib/dpif-provider.h | 5 +-- lib/dpif.c | 5 +-- lib/dpif.h | 2 +- lib/ovs-numa.c | 83 ++++++++++++++++++++++++++++++++++++---------- lib/ovs-numa.h | 21 ++++++++++-- ofproto/ofproto-dpif.c | 3 +- ofproto/ofproto-provider.h | 1 + ofproto/ofproto.c | 11 +++++- ofproto/ofproto.h | 3 +- vswitchd/bridge.c | 5 ++- vswitchd/vswitch.xml | 20 +++++++++++ 12 files changed, 184 insertions(+), 37 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f01fecb..1657621 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -213,6 +213,7 @@ struct dp_netdev { * for pin of pmd threads. */ size_t n_dpdk_rxqs; char *pmd_cmask; + char *nonpmd_cmask; uint64_t last_tnl_conf_seq; }; @@ -422,6 +423,7 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, static void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet **, int cnt); +static void *pmd_thread_main(void *); static void dp_netdev_disable_upcall(struct dp_netdev *); void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd); static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, @@ -704,6 +706,7 @@ dp_netdev_free(struct dp_netdev *dp) dp_netdev_destroy_upcall_lock(dp); free(dp->pmd_cmask); + free(dp->nonpmd_cmask); free(CONST_CAST(char *, dp->name)); free(dp); } @@ -766,6 +769,25 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) } static void +dp_netdev_set_nonpmd_affinity(void) +{ + cpu_set_t nonpmdset; + + if (ovs_numa_get_non_pmd_free_set(sizeof nonpmdset, &nonpmdset)) { + struct ovsthread *t; + CPU_SET(NON_PMD_CORE_ID, &nonpmdset); + + ovs_mutex_lock(&ovsthread_list_mutex); + LIST_FOR_EACH(t, list_node, &ovsthread_list) { + if (t->start != pmd_thread_main) { + pthread_setaffinity_np(t->thread, sizeof nonpmdset, &nonpmdset); + } + } + ovs_mutex_unlock(&ovsthread_list_mutex); + } +} + +static void dp_netdev_reload_pmd__(struct dp_netdev_pmd_thread *pmd) { int old_seq; @@ -881,6 +903,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, if (netdev_is_pmd(netdev)) { dp_netdev_set_pmds_on_numa(dp, netdev_get_numa_id(netdev)); dp_netdev_reload_pmds(dp); + dp_netdev_set_nonpmd_affinity(); } seq_change(dp->port_seq); @@ -2111,26 +2134,39 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) /* Returns true if the configuration for rx queues or cpu mask * is changed. */ static bool -pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, const char *cmask) +pmd_config_changed(const struct dp_netdev *dp, size_t rxqs, + const char *cmask_pmd) { if (dp->n_dpdk_rxqs != rxqs) { return true; } else { - if (dp->pmd_cmask != NULL && cmask != NULL) { - return strcmp(dp->pmd_cmask, cmask); + if (dp->pmd_cmask != NULL && cmask_pmd != NULL) { + return strcmp(dp->pmd_cmask, cmask_pmd); } else { - return (dp->pmd_cmask != NULL || cmask != NULL); + return (dp->pmd_cmask != NULL || cmask_pmd != NULL); } } } +/* Returns true if the configuration for nonpmd cpu mask is changed */ +static bool +nonpmd_config_changed(const struct dp_netdev *dp, const char *cmask_nonpmd) +{ + if (dp->nonpmd_cmask != NULL && cmask_nonpmd != NULL) { + return strcmp(dp->nonpmd_cmask, cmask_nonpmd); + } else { + return (dp->nonpmd_cmask != NULL || cmask_nonpmd != NULL); + } +} + /* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */ static int -dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) +dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, + const char *cmask_pmd, const char *cmask_nonpmd) { struct dp_netdev *dp = get_dp_netdev(dpif); - if (pmd_config_changed(dp, n_rxqs, cmask)) { + if (pmd_config_changed(dp, n_rxqs, cmask_pmd)) { struct dp_netdev_port *port; dp_netdev_destroy_all_pmds(dp); @@ -2166,14 +2202,23 @@ dpif_netdev_pmd_set(struct dpif *dpif, unsigned int n_rxqs, const char *cmask) dp->n_dpdk_rxqs = n_rxqs; /* Reconfigures the cpu mask. */ - ovs_numa_set_cpu_mask(cmask); + ovs_numa_set_cpu_mask_pmd(cmask_pmd); + ovs_numa_set_cpu_mask_nonpmd(cmask_nonpmd); free(dp->pmd_cmask); - dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL; + free(dp->nonpmd_cmask); + dp->pmd_cmask = cmask_pmd ? xstrdup(cmask_pmd) : NULL; + dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL; /* Restores the non-pmd. */ dp_netdev_set_nonpmd(dp); /* Restores all pmd threads. */ dp_netdev_reset_pmd_threads(dp); + } else if (nonpmd_config_changed(dp, cmask_nonpmd)) { + free(dp->nonpmd_cmask); + dp->nonpmd_cmask = cmask_nonpmd ? xstrdup(cmask_nonpmd) : NULL; + ovs_numa_set_cpu_mask_nonpmd(cmask_nonpmd); + + dp_netdev_set_nonpmd_affinity(); } return 0; @@ -2657,6 +2702,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp) dp_netdev_set_pmds_on_numa(dp, numa_id); } } + dp_netdev_set_nonpmd_affinity(); } static char * diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 7b4878eb..3612766 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -309,9 +309,10 @@ struct dpif_class { /* If 'dpif' creates its own I/O polling threads, refreshes poll threads * configuration. 'n_rxqs' configures the number of rx_queues, which * are distributed among threads. 'cmask' configures the cpu mask - * for setting the polling threads' cpu affinity. */ + * for setting the polling threads' cpu affinity. 'cmask_nonpmd' + * configures the cpumask of the remaining OVS threads */ int (*poll_threads_set)(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask); + const char *cmask, const char *cmask_nonpmd); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ diff --git a/lib/dpif.c b/lib/dpif.c index ee71774..32bc005 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1383,12 +1383,13 @@ dpif_print_packet(struct dpif *dpif, struct dpif_upcall *upcall) * configuration. */ int dpif_poll_threads_set(struct dpif *dpif, unsigned int n_rxqs, - const char *cmask) + const char *cmask, const char *cmask_nonpmd) { int error = 0; if (dpif->dpif_class->poll_threads_set) { - error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask); + error = dpif->dpif_class->poll_threads_set(dpif, n_rxqs, cmask, + cmask_nonpmd); if (error) { log_operation(dpif, "poll_threads_set", error); } diff --git a/lib/dpif.h b/lib/dpif.h index 06c6525..68774bf 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -820,7 +820,7 @@ void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); int dpif_recv_set(struct dpif *, bool enable); int dpif_handlers_set(struct dpif *, uint32_t n_handlers); int dpif_poll_threads_set(struct dpif *, unsigned int n_rxqs, - const char *cmask); + const char *cmask, const char *cmask_nonpmd); int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, struct ofpbuf *); void dpif_recv_purge(struct dpif *); diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 3aa1036..3b432f1 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -71,8 +71,9 @@ struct cpu_core { struct ovs_list list_node; /* In 'numa_node->cores' list. */ struct numa_node *numa; /* numa node containing the core. */ int core_id; /* Core id. */ - bool available; /* If the core can be pinned. */ + bool available_pmd; /* If the core can be pinned. */ bool pinned; /* If a thread has been pinned to the core. */ + bool available_nonpmd; /* If the core is available for nonpmd threads. */ }; /* Contains all 'struct numa_node's. */ @@ -126,7 +127,8 @@ discover_numa_and_core(void) list_insert(&n->cores, &c->list_node); c->core_id = core_id; c->numa = n; - c->available = true; + c->available_pmd = true; + c->available_nonpmd = true; n_cpus++; } } @@ -262,8 +264,9 @@ ovs_numa_get_n_cores_on_numa(int numa_id) return OVS_CORE_UNSPEC; } -/* Returns the number of cpu cores that are available and unpinned - * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */ +/* Returns the number of cpu cores that are available to be pinned + * (and currently unpinned) on numa node. + * Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */ int ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) { @@ -274,7 +277,7 @@ ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) int count = 0; LIST_FOR_EACH(core, list_node, &numa->cores) { - if (core->available && !core->pinned) { + if (core->available_pmd && !core->pinned) { count++; } } @@ -293,7 +296,7 @@ ovs_numa_try_pin_core_specific(int core_id) struct cpu_core *core = get_core_by_core_id(core_id); if (core) { - if (core->available && !core->pinned) { + if (core->available_pmd && !core->pinned) { core->pinned = true; return true; } @@ -311,7 +314,7 @@ ovs_numa_get_unpinned_core_any(void) struct cpu_core *core; HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { - if (core->available && !core->pinned) { + if (core->available_pmd && !core->pinned) { core->pinned = true; return core->core_id; } @@ -332,7 +335,7 @@ ovs_numa_get_unpinned_core_on_numa(int numa_id) struct cpu_core *core; LIST_FOR_EACH(core, list_node, &numa->cores) { - if (core->available && !core->pinned) { + if (core->available_pmd && !core->pinned) { core->pinned = true; return core->core_id; } @@ -390,11 +393,8 @@ ovs_numa_dump_destroy(struct ovs_numa_dump *dump) free(dump); } -/* Reads the cpu mask configuration from 'cmask' and sets the - * 'available' of corresponding cores. For unspecified cores, - * sets 'available' to false. */ -void -ovs_numa_set_cpu_mask(const char *cmask) +static void +ovs_numa_parse_cpu_mask(const char *cmask, void (*cb)(struct cpu_core *, bool)) { int core_id = 0; int i; @@ -403,12 +403,12 @@ ovs_numa_set_cpu_mask(const char *cmask) return; } - /* If no mask specified, resets the 'available' to true for all cores. */ + /* If no mask specified, defaults to all cores available*/ if (!cmask) { struct cpu_core *core; HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { - core->available = true; + cb(core, true); } return; @@ -433,7 +433,7 @@ ovs_numa_set_cpu_mask(const char *cmask) core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, hash_int(core_id++, 0)), struct cpu_core, hmap_node); - core->available = (bin >> j) & 0x1; + cb(core, (bin >> j) & 0x1); if (core_id >= hmap_count(&all_cpu_cores)) { return; @@ -448,8 +448,57 @@ ovs_numa_set_cpu_mask(const char *cmask) core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, hash_int(core_id++, 0)), struct cpu_core, hmap_node); - core->available = false; + cb(core, false); + } +} + +static void +available_pmd_cb(struct cpu_core *core, bool available) +{ + core->available_pmd = available; +} + +/* Reads the cpu mask configuration from 'cmask' and sets the + * 'available_pmd' of corresponding cores. For unspecified cores, + * sets 'available' to false. */ +void +ovs_numa_set_cpu_mask_pmd(const char *cmask_pmd) +{ + ovs_numa_parse_cpu_mask(cmask_pmd, available_pmd_cb); +} + +static void +available_nonpmd_cb(struct cpu_core *core, bool available) +{ + core->available_nonpmd = available; +} + +/* Reads the cpu mask configuration from 'cmask' and sets the + * 'available_nonpmd' of corresponding cores. For unspecified cores, + * sets 'available' to false. */ +void +ovs_numa_set_cpu_mask_nonpmd(const char *cmask_nonpmd) +{ + ovs_numa_parse_cpu_mask(cmask_nonpmd, available_nonpmd_cb); +} + +/* Reads the cpu mask configuration from 'cmask' and sets the + * 'available_nonpmd' of corresponding cores. For unspecified cores, + * sets 'available' to false. */ +bool +ovs_numa_get_non_pmd_free_set(size_t cpusetsize, cpu_set_t *cpuset) +{ + struct cpu_core *core; + + ovs_assert(cpusetsize == sizeof *cpuset); + + CPU_ZERO(cpuset); + HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { + if (!core->pinned && core->available_nonpmd) { + CPU_SET(core->core_id, cpuset); + } } + return true; } #endif /* __linux__ */ diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 35b351b..04f598a 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -22,6 +22,7 @@ #include "compiler.h" #include "list.h" +#include "ovs-thread.h" #define OVS_CORE_UNSPEC INT_MAX #define OVS_NUMA_UNSPEC INT_MAX @@ -45,7 +46,8 @@ bool ovs_numa_numa_id_is_valid(int numa_id); bool ovs_numa_core_id_is_valid(int core_id); bool ovs_numa_core_is_pinned(int core_id); int ovs_numa_get_n_numas(void); -void ovs_numa_set_cpu_mask(const char *cmask); +void ovs_numa_set_cpu_mask_pmd(const char *cmask); +void ovs_numa_set_cpu_mask_nonpmd(const char *cmask); int ovs_numa_get_n_cores(void); int ovs_numa_get_numa_id(int core_id); int ovs_numa_get_n_cores_on_numa(int numa_id); @@ -56,6 +58,7 @@ int ovs_numa_get_unpinned_core_on_numa(int numa_id); void ovs_numa_unpin_core(int core_id); struct ovs_numa_dump *ovs_numa_dump_cores_on_numa(int numa_id); void ovs_numa_dump_destroy(struct ovs_numa_dump *); +bool ovs_numa_get_non_pmd_free_set(size_t, cpu_set_t *); #define FOR_EACH_CORE_ON_NUMA(ITER, DUMP) \ LIST_FOR_EACH((ITER), list_node, &(DUMP)->dump) @@ -87,7 +90,13 @@ ovs_numa_core_is_pinned(int core_id OVS_UNUSED) } static inline void -ovs_numa_set_cpu_mask(const char *cmask OVS_UNUSED) +ovs_numa_set_cpu_mask_pmd(const char *cmask OVS_UNUSED) +{ + /* Nothing */ +} + +static inline void +ovs_numa_set_cpu_mask_nonpmd(const char *cmask OVS_UNUSED) { /* Nothing */ } @@ -158,9 +167,15 @@ ovs_numa_dump_destroy(struct ovs_numa_dump *dump OVS_UNUSED) /* Nothing */ } +static inline bool +ovs_numa_get_non_pmd_free_set(size_t s OVS_UNUSED, cpu_set_t *c OVS_UNUSED) +{ + return false; +} + /* No loop. */ #define FOR_EACH_CORE_ON_NUMA(ITER, DUMP) \ for ((ITER) = NULL; (ITER);) #endif /* __linux__ */ -#endif /* ovs-thead.h */ +#endif /* ovs-numa.h */ diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 9b67518..0a90f9e 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -569,7 +569,8 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } - dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask); + dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask, + nonpmd_cpu_mask); if (backer->need_revalidate) { struct ofproto_dpif *ofproto; diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 7208541..856491d 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -459,6 +459,7 @@ extern size_t n_dpdk_rxqs; /* Cpu mask for pmd threads. */ extern char *pmd_cpu_mask; +extern char *nonpmd_cpu_mask; static inline struct rule *rule_from_cls_rule(const struct cls_rule *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 07a1f5d..ecb7bb8 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -306,6 +306,7 @@ unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; size_t n_handlers, n_revalidators; size_t n_dpdk_rxqs; char *pmd_cpu_mask; +char *nonpmd_cpu_mask; /* Map from datapath name to struct ofproto, for use by unixctl commands. */ static struct hmap all_ofprotos = HMAP_INITIALIZER(&all_ofprotos); @@ -741,7 +742,7 @@ ofproto_set_n_dpdk_rxqs(int n_rxqs) } void -ofproto_set_cpu_mask(const char *cmask) +ofproto_set_pmd_cpu_mask(const char *cmask) { free(pmd_cpu_mask); @@ -749,6 +750,14 @@ ofproto_set_cpu_mask(const char *cmask) } void +ofproto_set_nonpmd_cpu_mask(const char *cmask) +{ + free(nonpmd_cpu_mask); + + nonpmd_cpu_mask = cmask ? xstrdup(cmask) : NULL; +} + +void ofproto_set_threads(int n_handlers_, int n_revalidators_) { int threads = MAX(count_cpu_cores(), 2); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 7dc1874..bc7359b 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -317,7 +317,8 @@ int ofproto_port_set_mcast_snooping(struct ofproto *ofproto, void *aux, const struct ofproto_mcast_snooping_port_settings *s); void ofproto_set_threads(int n_handlers, int n_revalidators); void ofproto_set_n_dpdk_rxqs(int n_rxqs); -void ofproto_set_cpu_mask(const char *cmask); +void ofproto_set_pmd_cpu_mask(const char *cmask); +void ofproto_set_nonpmd_cpu_mask(const char *cmask); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); int ofproto_set_netflow(struct ofproto *, diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index dd622dc..1bbd6af 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -565,7 +565,10 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) OFPROTO_MAX_IDLE_DEFAULT)); ofproto_set_n_dpdk_rxqs(smap_get_int(&ovs_cfg->other_config, "n-dpdk-rxqs", 0)); - ofproto_set_cpu_mask(smap_get(&ovs_cfg->other_config, "pmd-cpu-mask")); + ofproto_set_pmd_cpu_mask(smap_get(&ovs_cfg->other_config, + "pmd-cpu-mask")); + ofproto_set_nonpmd_cpu_mask(smap_get(&ovs_cfg->other_config, + "nonpmd-cpu-mask")); ofproto_set_threads( smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0), diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index e346119..5d14487 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -180,6 +180,26 @@ </p> </column> + <column name="other_config" key="nonpmd-cpu-mask"> + <p> + Specifies CPU mask for setting the cpu affinity of non PMD threads + in OVS. Value should be in the form of hex string, similar to the + dpdk EAL '-c COREMASK' option input or the 'taskset' mask input. + This key is effective only when PMD threads are used (i.e. when the + userspace datapath is used with DPDK devices). + </p> + <p> + The lowest order bit corresponds to the first CPU core. A set bit + means the corresponding core is available. All the non PMD threads + will be pinned to the set of core specified by this option, minus + the cores used for PMD threads operations. + </p> + <p> + If not specified, the non PMD threads will be bound to every core + not used for pmd operations. + </p> + </column> + <column name="other_config" key="n-handler-threads" type='{"type": "integer", "minInteger": 1}'> <p> -- 2.1.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev