Hi Aaron, thanks (again!) for this patch. Few comments:
* As I mentioned on my previous round of review, I don't think it's necessary to pass the whole Open_vSwitch table to dpdk_init(). I.e., instead of doing dpdk_init(&cfg); I'd prefer if (cfg) { dpdk_init(&cfg->other_config); } This way we don't have to include "vswitch-idl.h". * I suggested 'dpdk-mem-channels', because I think people who want to use that will be comfortable passing '-n' in dpdk-extra. What do you think? Is there any reason why you think it's worth keeping? * Sorry for not noticing this before: it seems that netdev_dpdk_register() now always registers the netdev classes, even though they cannot be created. The registered classes end up in the database in the iface_type column of the Open_vSwitch table, so controllers might think that they're available. I think we should register the classes only when DPDK is initialized. Two minor nits inline, Thanks On 26/04/2016 12:42, "Aaron Conole" <acon...@redhat.com> wrote: >Existing DPDK integration is provided by use of command line options which >must be split out and passed to librte in a special manner. However, this >forces any configuration to be passed by way of a special DPDK flag, and >interferes with ovs+dpdk packaging solutions. > >This commit delays dpdk initialization until after the OVS database >connection is established, at which point ovs initializes librte. It >pulls all of the config data from the OVS database, and assembles a >new argv/argc pair to be passed along. > >Signed-off-by: Aaron Conole <acon...@redhat.com> >Acked-by: Kevin Traynor <kevin.tray...@intel.com> >--- >Previous: http://openvswitch.org/pipermail/dev/2016-April/069028.html > >v12: >* Squashed NEWS change into this commit > > FAQ.md | 6 +- > INSTALL.DPDK.md | 82 +++++++++--- > NEWS | 5 + > lib/automake.mk | 4 + > lib/netdev-dpdk.c | 304 +++++++++++++++++++++++++++++++++------------ > lib/netdev-dpdk.h | 14 +-- > lib/netdev-nodpdk.c | 21 ++++ > tests/ofproto-macros.at | 3 +- > utilities/ovs-dev.py | 13 +- > vswitchd/bridge.c | 3 + > vswitchd/ovs-vswitchd.8.in | 6 +- > vswitchd/ovs-vswitchd.c | 27 +--- > vswitchd/vswitch.xml | 120 ++++++++++++++++++ > 13 files changed, 463 insertions(+), 145 deletions(-) > create mode 100644 lib/netdev-nodpdk.c > [...] > >@@ -2732,22 +2734,20 @@ static const struct dpdk_qos_ops egress_policer_ops = { > > static int > process_vhost_flags(char *flag, char *default_val, int size, >- char **argv, char **new_val) >+ const struct ovsrec_open_vswitch *ovs_cfg, >+ char **new_val) > { >+ const char *val; > int changed = 0; > >+ val = smap_get(&ovs_cfg->other_config, flag); >+ > /* Depending on which version of vhost is in use, process the > vhost-specific >- * flag if it is provided on the vswitchd command line, otherwise resort >to >- * a default value. >- * >- * For vhost-user: Process "-vhost_sock_dir" to set the custom location of >- * the vhost-user socket(s). >- * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the >- * vhost-cuse character device. >+ * flag if it is provided, otherwise resort to default value. > */ >- if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) { >+ if (val && (strlen(val) <= size)) { > changed = 1; >- *new_val = xstrdup(argv[2]); >+ *new_val = xstrdup(val); > VLOG_INFO("User-provided %s in use: %s", flag, *new_val); > } else { > VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); >@@ -2757,68 +2757,186 @@ process_vhost_flags(char *flag, char *default_val, >int size, > return changed; > } > >-int >-dpdk_init(int argc, char **argv) >+static char ** >+grow_argv(char ***argv, size_t cur_siz, size_t grow_by) > { >- int result; >- int base = 0; >- char *pragram_name = argv[0]; >- int err; >- int isset; >- cpu_set_t cpuset; >+ return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by)); >+} > >- if (argc < 2 || strcmp(argv[1], "--dpdk")) >- return 0; >+static void >+dpdk_option_extend(char ***argv, int argc, const char *option, >+ const char *value) >+{ >+ char **newargv = grow_argv(argv, argc, 2); >+ *argv = newargv; >+ newargv[argc] = xstrdup(option); >+ newargv[argc+1] = xstrdup(value); >+} > >- /* Remove the --dpdk argument from arg list.*/ >- argc--; >- argv++; >+static int >+construct_dpdk_options(const struct ovsrec_open_vswitch *ovs_cfg, >+ char ***argv, const int initial_size) >+{ >+ struct dpdk_options_map { >+ const char *ovs_configuration; >+ const char *dpdk_option; >+ bool default_enabled; >+ const char *default_value; >+ } opts[] = { >+ {"dpdk-lcore-mask", "-c", false, NULL}, >+ {"dpdk-mem-channels", "-n", false, NULL}, >+ {"dpdk-hugepage-dir", "--huge-dir", false, NULL}, >+ }; >+ >+ int i, ret = initial_size; >+ >+ /*First, construct from the flat-options (non-mutex)*/ >+ for (i = 0; i < ARRAY_SIZE(opts); ++i) { >+ const char *lookup = smap_get(&ovs_cfg->other_config, >+ opts[i].ovs_configuration); >+ if (!lookup && opts[i].default_enabled) { >+ lookup = opts[i].default_value; >+ } > >- /* Reject --user option */ >- int i; >- for (i = 0; i < argc; i++) { >- if (!strcmp(argv[i], "--user")) { >- VLOG_ERR("Can not mix --dpdk and --user options, aborting."); >+ if (lookup) { >+ dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup); >+ ret += 2; > } > } > >+ return ret; >+} >+ >+#define MAX_DPDK_EXCL_OPTS 10 >+ >+static int >+construct_dpdk_mutex_options(const struct ovsrec_open_vswitch *ovs_cfg, >+ char ***argv, const int initial_size) >+{ >+ struct dpdk_exclusive_options_map { >+ const char *category; >+ const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS]; >+ const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS]; >+ const char *default_value; >+ int default_option; >+ } excl_opts[] = { >+ {"memory type", >+ {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,}, >+ {"-m", "--socket-mem", NULL,}, >+ "1024,0", 1 >+ }, >+ }; >+ >+ int i, ret = initial_size; >+ for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) { >+ int found_opts = 0, scan, found_pos = -1; >+ const char *found_value; >+ struct dpdk_exclusive_options_map *popt = &excl_opts[i]; >+ >+ for (scan = 0; scan < MAX_DPDK_EXCL_OPTS >+ && popt->ovs_dpdk_options[scan]; ++scan) { >+ const char *lookup = smap_get(&ovs_cfg->other_config, >+ popt->ovs_dpdk_options[scan]); >+ if (lookup && strlen(lookup)) { >+ found_opts++; >+ found_pos = scan; >+ found_value = lookup; >+ } >+ } >+ >+ if (!found_opts) { >+ if (popt->default_option) { >+ found_pos = popt->default_option; >+ found_value = popt->default_value; >+ } else { >+ continue; >+ } >+ } >+ >+ if (found_opts > 1) { >+ VLOG_ERR("Multiple defined options for %s. Please check your" >+ " database settings and reconfigure if necessary.", >+ popt->category); >+ } >+ >+ dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos], >+ found_value); >+ ret += 2; >+ } >+ >+ return ret; >+} >+ >+static int >+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) >+{ >+ int i = construct_dpdk_options(ovs_cfg, argv, 1); >+ i = construct_dpdk_mutex_options(ovs_cfg, argv, i); >+ return i; >+} >+ >+static char **dpdk_argv; >+static int dpdk_argc; >+ >+static void >+deferred_argv_release(void) >+{ >+ int result; >+ for (result = 0; result < dpdk_argc; ++result) { >+ free(dpdk_argv[result]); >+ } >+ >+ free(dpdk_argv); >+} >+ >+static void >+dpdk_init__(const struct ovsrec_open_vswitch *ovs_cfg) >+{ >+ char **argv = NULL; >+ int result; >+ int argc; >+ int err; >+ cpu_set_t cpuset; >+ >+ if (!smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) { >+ VLOG_INFO("DPDK Disabled - to change this requires a restart.\n"); >+ return; >+ } >+ >+ VLOG_INFO("DPDK Enabled, initializing"); >+ > #ifdef VHOST_CUSE >- if (process_vhost_flags("-cuse_dev_name", xstrdup("vhost-net"), >- PATH_MAX, argv, &cuse_dev_name)) { >+ if (process_vhost_flags("cuse-dev-name", xstrdup("vhost-net"), >+ PATH_MAX, ovs_cfg, &cuse_dev_name)) { > #else >- if (process_vhost_flags("-vhost_sock_dir", xstrdup(ovs_rundir()), >- NAME_MAX, argv, &vhost_sock_dir)) { >+ if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()), >+ NAME_MAX, ovs_cfg, &vhost_sock_dir)) { > struct stat s; >- int err; > > err = stat(vhost_sock_dir, &s); > if (err) { >- VLOG_ERR("vHostUser socket DIR '%s' does not exist.", >- vhost_sock_dir); >- return err; >+ VLOG_ERR("vhost-user sock directory '%s' does not exist.", >+ vhost_sock_dir); > } > #endif >- /* Remove the vhost flag configuration parameters from the argument >- * list, so that the correct elements are passed to the DPDK >- * initialization function >- */ >- argc -= 2; >- argv += 2; /* Increment by two to bypass the vhost flag arguments >*/ >- base = 2; > } > > /* Get the main thread affinity */ > CPU_ZERO(&cpuset); >- err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); >+ err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), >+ &cpuset); > if (err) { > VLOG_ERR("Thread getaffinity error %d.", err); >- return err; > } > >- /* Keep the program name argument as this is needed for call to >- * rte_eal_init() >- */ >- argv[0] = pragram_name; >+ argv = grow_argv(&argv, 0, 1); >+ argv[0] = xstrdup(ovs_get_program_name()); >+ argc = get_dpdk_args(ovs_cfg, &argv); >+ >+ argv = grow_argv(&argv, argc, 1); >+ argv[argc] = 0; sparse complains about this: we should use NULL instead of 0 >+ >+ optind = 1; > > /* Make sure things are initialized ... */ > result = rte_eal_init(argc, argv); >@@ -2827,23 +2945,51 @@ dpdk_init(int argc, char **argv) > } > > /* Set the main thread affinity back to pre rte_eal_init() value */ >- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); >- if (err) { >- VLOG_ERR("Thread setaffinity error %d", err); >- return err; >+ if (!err) { >+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), >+ &cpuset); >+ if (err) { >+ VLOG_ERR("Thread setaffinity error %d", err); >+ } > } > >+ dpdk_argv = argv; >+ dpdk_argc = argc; >+ >+ atexit(deferred_argv_release); >+ > rte_memzone_dump(stdout); > rte_eal_init_ret = 0; > >- if (argc > result) { >- argv[result] = argv[0]; >- } >- > /* We are called from the main thread here */ > RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; > >- return result + 1 + base; >+ ovs_thread_create("dpdk_watchdog", dpdk_watchdog, NULL); >+ >+#ifdef VHOST_CUSE >+ /* Register CUSE device to handle IOCTLs. >+ * Unless otherwise specified, cuse_dev_name is set to vhost-net. >+ */ >+ err = rte_vhost_driver_register(cuse_dev_name); >+ >+ if (err != 0) { >+ VLOG_ERR("CUSE device setup failure."); >+ return; >+ } >+#endif >+ >+ dpdk_vhost_class_init(); >+} >+ >+void >+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) >+{ >+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; >+ >+ if (ovs_cfg && ovsthread_once_start(&once)) { >+ dpdk_init__(ovs_cfg); >+ ovsthread_once_done(&once); >+ } > } > > static const struct netdev_class dpdk_class = >@@ -2907,10 +3053,6 @@ netdev_dpdk_register(void) > { > static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > >- if (rte_eal_init_ret) { >- return; >- } >- > if (ovsthread_once_start(&once)) { > dpdk_common_init(); > netdev_register_provider(&dpdk_class); >diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h >index 646d3e2..143a609 100644 >--- a/lib/netdev-dpdk.h >+++ b/lib/netdev-dpdk.h >@@ -4,6 +4,7 @@ > #include <config.h> > > struct dp_packet; >+struct ovsrec_open_vswitch; > > #ifdef DPDK_NETDEV > >@@ -22,7 +23,6 @@ struct dp_packet; > > #define NON_PMD_CORE_ID LCORE_ID_ANY > >-int dpdk_init(int argc, char **argv); > void netdev_dpdk_register(void); > void free_dpdk_buf(struct dp_packet *); > int pmd_thread_setaffinity_cpu(unsigned cpu); >@@ -33,15 +33,6 @@ int pmd_thread_setaffinity_cpu(unsigned cpu); > > #include "util.h" > >-static inline int >-dpdk_init(int argc, char **argv) >-{ >- if (argc >= 2 && !strcmp(argv[1], "--dpdk")) { >- ovs_fatal(0, "DPDK support not built into this copy of Open >vSwitch."); >- } >- return 0; >-} >- > static inline void > netdev_dpdk_register(void) > { >@@ -61,4 +52,7 @@ pmd_thread_setaffinity_cpu(unsigned cpu OVS_UNUSED) > } > > #endif /* DPDK_NETDEV */ >+ >+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg); >+ > #endif >diff --git a/lib/netdev-nodpdk.c b/lib/netdev-nodpdk.c >new file mode 100644 >index 0000000..28e637a >--- /dev/null >+++ b/lib/netdev-nodpdk.c >@@ -0,0 +1,21 @@ >+#include <config.h> >+#include "netdev-dpdk.h" >+#include "smap.h" >+#include "ovs-thread.h" >+#include "openvswitch/vlog.h" >+#include "vswitch-idl.h" >+ >+VLOG_DEFINE_THIS_MODULE(dpdk); >+ >+void >+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED) This is used now >+{ >+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; >+ >+ if (ovs_cfg && ovsthread_once_start(&once)) { >+ if (smap_get_bool(&ovs_cfg->other_config, "dpdk-init", false)) { >+ VLOG_ERR("DPDK not supported in this copy of Open vSwitch."); >+ } >+ ovsthread_once_done(&once); >+ } >+} _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev