This option is used to initialize the ovs_numa module with a fake configuration and to avoid pthread_setaffinity_np() calls. It will be useful to test dpif-netdev with pmd threads.
Since it is only used for testing it is not documented in the man pages. Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> --- lib/ovs-numa.c | 166 ++++++++++++++++++++++++++++++++++++++++-------- lib/ovs-numa.h | 1 + vswitchd/ovs-vswitchd.c | 6 ++ 3 files changed, 147 insertions(+), 26 deletions(-) diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c index 7bb2bcc..cba08d0 100644 --- a/lib/ovs-numa.c +++ b/lib/ovs-numa.c @@ -80,6 +80,11 @@ static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes); static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); /* True if numa node and core info are correctly extracted. */ static bool found_numa_and_core; +/* True if the module was initialized with dummy options. In this case, the + * module must not interact with the actual cpus/nodes in the system. */ +static bool dummy_numa = false; + +static struct numa_node* get_numa_by_numa_id(int numa_id); #ifdef __linux__ /* Returns true if 'str' contains all digits. Returns false otherwise. */ @@ -90,13 +95,89 @@ contain_all_digits(const char *str) } #endif /* __linux__ */ +static struct numa_node * +insert_new_numa_node(int numa_id) +{ + struct numa_node *n = xzalloc(sizeof *n); + + hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(numa_id, 0)); + ovs_list_init(&n->cores); + n->numa_id = numa_id; + + return n; +} + +static struct cpu_core * +insert_new_cpu_core(struct numa_node *n, unsigned core_id) +{ + struct cpu_core *c = xzalloc(sizeof *c); + + hmap_insert(&all_cpu_cores, &c->hmap_node, hash_int(core_id, 0)); + ovs_list_insert(&n->cores, &c->list_node); + c->core_id = core_id; + c->numa = n; + c->available = true; + + return c; +} + +/* Has the same effect as discover_numa_and_core(), but instead of reading + * sysfs entries, extracts the info from 'dummy_config'. + * + * 'dummy_config' lists the numa_ids of each CPU separated by a comma, e.g. + * - "0,0,0,0": four cores on numa socket 0. + * - "0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1": 16 cores on two numa sockets. + * - "0,0,0,0,1,1,1,1": 8 cores on two numa sockets. + * + * The different numa ids must be consecutives or the function will abort. */ +static void +discover_numa_and_core_dummy(const char *dummy_config) +{ + char *conf = xstrdup(dummy_config); + char *id, *saveptr = NULL; + unsigned i = 0; + long max_numa_id = 0; + + for (id = strtok_r(conf, ",", &saveptr); id; + id = strtok_r(NULL, ",", &saveptr)) { + struct hmap_node *hnode; + struct numa_node *n; + long numa_id; + + numa_id = strtol(id, NULL, 10); + if (numa_id < 0 || numa_id >= MAX_NUMA_NODES) { + VLOG_WARN("Invalid numa node %ld", numa_id); + continue; + } + + max_numa_id = MAX(max_numa_id, numa_id); + + hnode = hmap_first_with_hash(&all_numa_nodes, hash_int(numa_id, 0)); + + if (hnode) { + n = CONTAINER_OF(hnode, struct numa_node, hmap_node); + } else { + n = insert_new_numa_node(numa_id); + } + + insert_new_cpu_core(n, i); + + i++; + } + + free(conf); + + if (max_numa_id + 1 != hmap_count(&all_numa_nodes)) { + ovs_fatal(0, "dummy numa contains non consecutive numa ids"); + } +} + /* Discovers all numa nodes and the corresponding cpu cores. * Constructs the 'struct numa_node' and 'struct cpu_core'. */ static void discover_numa_and_core(void) { #ifdef __linux__ - int n_cpus = 0; int i; DIR *dir; bool numa_supported = true; @@ -125,31 +206,20 @@ discover_numa_and_core(void) /* Creates 'struct numa_node' if the 'dir' is non-null. */ if (dir) { - struct numa_node *n = xzalloc(sizeof *n); + struct numa_node *n; struct dirent *subdir; - hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0)); - ovs_list_init(&n->cores); - n->numa_id = i; + n = insert_new_numa_node(i); while ((subdir = readdir(dir)) != NULL) { if (!strncmp(subdir->d_name, "cpu", 3) - && contain_all_digits(subdir->d_name + 3)){ - struct cpu_core *c = xzalloc(sizeof *c); + && contain_all_digits(subdir->d_name + 3)) { unsigned core_id; core_id = strtoul(subdir->d_name + 3, NULL, 10); - hmap_insert(&all_cpu_cores, &c->hmap_node, - hash_int(core_id, 0)); - ovs_list_insert(&n->cores, &c->list_node); - c->core_id = core_id; - c->numa = n; - c->available = true; - n_cpus++; + insert_new_cpu_core(n, core_id); } } - VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d", - ovs_list_size(&n->cores), n->numa_id); closedir(dir); } else if (errno != ENOENT) { VLOG_WARN("opendir(%s) failed (%s)", path, @@ -161,12 +231,6 @@ discover_numa_and_core(void) break; } } - - VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores", - hmap_count(&all_numa_nodes), n_cpus); - if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) { - found_numa_and_core = true; - } #endif /* __linux__ */ } @@ -201,18 +265,63 @@ get_numa_by_numa_id(int numa_id) } -/* Extracts the numa node and core info from the 'sysfs'. */ -void -ovs_numa_init(void) + +static bool +ovs_numa_init__(const char *dummy_config) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { - discover_numa_and_core(); + const struct numa_node *n; + + if (!dummy_config) { + discover_numa_and_core(); + } else { + discover_numa_and_core_dummy(dummy_config); + } + + HMAP_FOR_EACH(n, hmap_node, &all_numa_nodes) { + VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d", + ovs_list_size(&n->cores), n->numa_id); + } + + VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %"PRIuSIZE" CPU cores", + hmap_count(&all_numa_nodes), hmap_count(&all_cpu_cores)); + + if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) { + found_numa_and_core = true; + } + ovsthread_once_done(&once); + + return true; + } else { + return false; } } +/* Extracts the numa node and core info from the 'dummy_config'. This is + * useful for testing purposes. The function must be called only once, + * before ovs_numa_init(). Every subsequent call to ovs_numa_init() will + * have no effect. + * + * The format of 'dummy_config' is explained in the comment above + * discover_numa_and_core_dummy().*/ +void +ovs_numa_init_dummy(const char *dummy_config) +{ + dummy_numa = true; + ovs_assert(dummy_config); + ovs_assert(ovs_numa_init__(dummy_config)); +} + +/* Extracts the numa node and core info from sysfs. */ +void +ovs_numa_init(void) +{ + ovs_numa_init__(NULL); +} + bool ovs_numa_numa_id_is_valid(int numa_id) { @@ -476,6 +585,11 @@ ovs_numa_set_cpu_mask(const char *cmask) int ovs_numa_thread_setaffinity_core(unsigned core_id) { + if (dummy_numa) { + /* Nothing to do */ + return 0; + } + #ifdef __linux__ cpu_set_t cpuset; int err; diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index 5b3444b..9a434df 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -39,6 +39,7 @@ struct ovs_numa_info { }; void ovs_numa_init(void); +void ovs_numa_init_dummy(const char *dummy_config); bool ovs_numa_numa_id_is_valid(int numa_id); bool ovs_numa_core_id_is_valid(unsigned core_id); bool ovs_numa_core_is_pinned(unsigned core_id); diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index 7d467a1..12a3db1 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -145,6 +145,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) OPT_DISABLE_SYSTEM, DAEMON_OPTION_ENUMS, OPT_DPDK, + OPT_DUMMY_NUMA, }; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, @@ -159,6 +160,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, {"dpdk", optional_argument, NULL, OPT_DPDK}, + {"dummy-numa", required_argument, NULL, OPT_DUMMY_NUMA}, {NULL, 0, NULL, 0}, }; char *short_options = ovs_cmdl_long_options_to_short_options(long_options); @@ -214,6 +216,10 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) ovs_fatal(0, "Using --dpdk to configure DPDK is not supported."); break; + case OPT_DUMMY_NUMA: + ovs_numa_init_dummy(optarg); + break; + default: abort(); } -- 2.8.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev