Thu, Jan 17, 2019 at 10:59:11PM CET, era...@mellanox.com wrote: >Devlink health reporter is an instance for reporting, diagnosing and >recovering from run time errors discovered by the reporters. >Define it's data structure and supported operations. >In addition, expose devlink API to create and destroy a reporter. >Each devlink instance will hold it's own reporters list. > >As part of the allocation, driver shall provide a set of callbacks which >will be used the devlink in order to handle health reports and user >commands related to this reporter. In addition, driver is entitled to >provide some priv pointer, which can be fetched from the reporter by >devlink_health_reporter_priv function. > >For each reporter, devlink will hold a metadata of statistics, >buffers and status. > >Signed-off-by: Eran Ben Elisha <era...@mellanox.com> >Reviewed-by: Moshe Shemesh <mo...@mellanox.com> >--- > include/net/devlink.h | 59 ++++++++++++++++++++ > net/core/devlink.c | 127 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 186 insertions(+) > >diff --git a/include/net/devlink.h b/include/net/devlink.h >index 77c77319290a..7fe30d67678a 100644 >--- a/include/net/devlink.h >+++ b/include/net/devlink.h >@@ -30,6 +30,7 @@ struct devlink { > struct list_head param_list; > struct list_head region_list; > u32 snapshot_id; >+ struct list_head reporter_list; > struct devlink_dpipe_headers *dpipe_headers; > const struct devlink_ops *ops; > struct device *dev; >@@ -424,6 +425,34 @@ struct devlink_region; > typedef void devlink_snapshot_data_dest_t(const void *data); > > struct devlink_health_buffer; >+struct devlink_health_reporter; >+ >+/** >+ * struct devlink_health_reporter_ops - Reporter operations >+ * @name: reporter name >+ * dump_size: dump buffer size allocated by the devlink >+ * diagnose_size: diagnose buffer size allocated by the devlink >+ * recover: callback to recover from reported error >+ * if priv_ctx is NULL, run a full recover >+ * dump: callback to dump an object >+ * if priv_ctx is NULL, run a full dump >+ * diagnose: callback to diagnose the current status >+ */ >+ >+struct devlink_health_reporter_ops { >+ char *name; >+ unsigned int dump_size; >+ unsigned int diagnose_size; >+ int (*recover)(struct devlink_health_reporter *reporter, >+ void *priv_ctx); >+ int (*dump)(struct devlink_health_reporter *reporter, >+ struct devlink_health_buffer **buffers_array, >+ unsigned int buffer_size, unsigned int num_buffers, >+ void *priv_ctx); >+ int (*diagnose)(struct devlink_health_reporter *reporter, >+ struct devlink_health_buffer **buffers_array, >+ unsigned int buffer_size, unsigned int num_buffers); >+}; > > struct devlink_ops { > int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); >@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct >devlink_health_buffer *buffer, > char *name); > int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer, > void *data, int len); >+struct devlink_health_reporter * >+devlink_health_reporter_create(struct devlink *devlink, >+ const struct devlink_health_reporter_ops *ops, >+ u64 graceful_period, bool auto_recover, >+ void *priv); >+void >+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter); >+ >+void * >+devlink_health_reporter_priv(struct devlink_health_reporter *reporter); > #else > > static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, >@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct >devlink_health_buffer *buffer, > { > return 0; > } >+ >+static inline struct devlink_health_reporter * >+devlink_health_reporter_create(struct devlink *devlink, >+ const struct devlink_health_reporter_ops *ops, >+ u64 graceful_period, bool auto_recover, >+ void *priv) >+{ >+ return NULL; >+} >+ >+static inline void >+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) >+{ >+} >+ >+static inline void * >+devlink_health_reporter_priv(struct devlink_health_reporter *reporter) >+{ >+ return NULL; >+} > #endif > > #endif /* _NET_DEVLINK_H_ */ >diff --git a/net/core/devlink.c b/net/core/devlink.c >index 8984501edade..fec169a28dba 100644 >--- a/net/core/devlink.c >+++ b/net/core/devlink.c >@@ -4098,6 +4098,132 @@ devlink_health_buffer_snd(struct genl_info *info, > return err; > } > >+struct devlink_health_reporter { >+ struct list_head list; >+ struct devlink_health_buffer **dump_buffers_array; >+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */ >+ struct devlink_health_buffer **diagnose_buffers_array; >+ struct mutex diagnose_lock; /* lock parallel read/write from diagnose >buffers */ >+ void *priv; >+ const struct devlink_health_reporter_ops *ops; >+ struct devlink *devlink; >+ u64 graceful_period; >+ bool auto_recover; >+ u8 health_state; >+}; >+ >+void * >+devlink_health_reporter_priv(struct devlink_health_reporter *reporter) >+{ >+ return reporter->priv; >+} >+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); >+ >+static struct devlink_health_reporter * >+devlink_health_reporter_find_by_name(struct devlink *devlink, >+ const char *reporter_name) >+{ >+ struct devlink_health_reporter *reporter; >+ >+ list_for_each_entry(reporter, &devlink->reporter_list, list) >+ if (!strcmp(reporter->ops->name, reporter_name)) >+ return reporter; >+ return NULL; >+} >+ >+/** >+ * devlink_health_reporter_create - create devlink health reporter >+ * >+ * @devlink: devlink >+ * @ops: ops >+ * @graceful_period: to avoid recovery loops, in msecs >+ * @auto_recover: auto recover when error occurs >+ * @priv: priv >+ */ >+struct devlink_health_reporter * >+devlink_health_reporter_create(struct devlink *devlink, >+ const struct devlink_health_reporter_ops *ops, >+ u64 graceful_period, bool auto_recover, >+ void *priv) >+{ >+ struct devlink_health_reporter *reporter; >+ >+ mutex_lock(&devlink->lock); >+ if (devlink_health_reporter_find_by_name(devlink, ops->name)) { >+ reporter = ERR_PTR(-EEXIST); >+ goto unlock; >+ } >+ >+ if (WARN_ON(ops->dump && !ops->dump_size) || >+ WARN_ON(ops->diagnose && !ops->diagnose_size) || >+ WARN_ON(auto_recover && !ops->recover) || >+ WARN_ON(graceful_period && !ops->recover)) { >+ reporter = ERR_PTR(-EINVAL); >+ goto unlock; >+ } >+ >+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); >+ if (!reporter) { >+ reporter = ERR_PTR(-ENOMEM); >+ goto unlock; >+ } >+ >+ if (ops->dump) { >+ reporter->dump_buffers_array = >+ devlink_health_buffers_create(ops->dump_size); >+ if (!reporter->dump_buffers_array) { >+ kfree(reporter); >+ reporter = ERR_PTR(-ENOMEM); >+ goto unlock; >+ } >+ } >+ >+ if (ops->diagnose) { >+ reporter->diagnose_buffers_array = >+ devlink_health_buffers_create(ops->diagnose_size); >+ if (!reporter->diagnose_buffers_array) { >+ >devlink_health_buffers_destroy(reporter->dump_buffers_array, >+ >DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
This is just ugly. :/ As I wrote in the other email, should be converted to simple "msg_ctx = msg_ctx_create(), msg_ctx_destroy(msg_ctx)", no sizes, no buffers visible to the driver. >+ kfree(reporter); >+ reporter = ERR_PTR(-ENOMEM); >+ goto unlock; >+ } >+ } >+ >+ list_add_tail(&reporter->list, &devlink->reporter_list); >+ mutex_init(&reporter->dump_lock); >+ mutex_init(&reporter->diagnose_lock); >+ >+ reporter->priv = priv; >+ reporter->ops = ops; >+ reporter->devlink = devlink; >+ reporter->graceful_period = graceful_period; >+ reporter->auto_recover = auto_recover; >+unlock: >+ mutex_unlock(&devlink->lock); >+ return reporter; >+} >+EXPORT_SYMBOL_GPL(devlink_health_reporter_create); >+ >+/** >+ * devlink_health_reporter_destroy - destroy devlink health reporter >+ * >+ * @reporter: devlink health reporter to destroy >+ */ >+void >+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) >+{ >+ mutex_lock(&reporter->devlink->lock); >+ list_del(&reporter->list); >+ devlink_health_buffers_destroy(reporter->dump_buffers_array, >+ >DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size)); >+ devlink_health_buffers_destroy(reporter->diagnose_buffers_array, >+ >DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size)); >+ kfree(reporter); >+ mutex_unlock(&reporter->devlink->lock); >+} >+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); >+ > static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { > [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, > [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, >@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops >*ops, size_t priv_size) > INIT_LIST_HEAD(&devlink->resource_list); > INIT_LIST_HEAD(&devlink->param_list); > INIT_LIST_HEAD(&devlink->region_list); >+ INIT_LIST_HEAD(&devlink->reporter_list); > mutex_init(&devlink->lock); > return devlink; > } >-- >2.17.1 >