Thu, Jan 17, 2019 at 10:59:11PM CET, era...@mellanox.com wrote:
>Devlink health reporter is an instance for reporting, diagnosing and
>recovering from run time errors discovered by the reporters.
>Define it's data structure and supported operations.
>In addition, expose devlink API to create and destroy a reporter.
>Each devlink instance will hold it's own reporters list.
>
>As part of the allocation, driver shall provide a set of callbacks which
>will be used the devlink in order to handle health reports and user
>commands related to this reporter. In addition, driver is entitled to
>provide some priv pointer, which can be fetched from the reporter by
>devlink_health_reporter_priv function.
>
>For each reporter, devlink will hold a metadata of statistics,
>buffers and status.
>
>Signed-off-by: Eran Ben Elisha <era...@mellanox.com>
>Reviewed-by: Moshe Shemesh <mo...@mellanox.com>
>---
> include/net/devlink.h |  59 ++++++++++++++++++++
> net/core/devlink.c    | 127 ++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 186 insertions(+)
>
>diff --git a/include/net/devlink.h b/include/net/devlink.h
>index 77c77319290a..7fe30d67678a 100644
>--- a/include/net/devlink.h
>+++ b/include/net/devlink.h
>@@ -30,6 +30,7 @@ struct devlink {
>       struct list_head param_list;
>       struct list_head region_list;
>       u32 snapshot_id;
>+      struct list_head reporter_list;
>       struct devlink_dpipe_headers *dpipe_headers;
>       const struct devlink_ops *ops;
>       struct device *dev;
>@@ -424,6 +425,34 @@ struct devlink_region;
> typedef void devlink_snapshot_data_dest_t(const void *data);
> 
> struct devlink_health_buffer;
>+struct devlink_health_reporter;
>+
>+/**
>+ * struct devlink_health_reporter_ops - Reporter operations
>+ * @name: reporter name
>+ * dump_size: dump buffer size allocated by the devlink
>+ * diagnose_size: diagnose buffer size allocated by the devlink
>+ * recover: callback to recover from reported error
>+ *          if priv_ctx is NULL, run a full recover
>+ * dump: callback to dump an object
>+ *       if priv_ctx is NULL, run a full dump
>+ * diagnose: callback to diagnose the current status
>+ */
>+
>+struct devlink_health_reporter_ops {
>+      char *name;
>+      unsigned int dump_size;
>+      unsigned int diagnose_size;
>+      int (*recover)(struct devlink_health_reporter *reporter,
>+                     void *priv_ctx);
>+      int (*dump)(struct devlink_health_reporter *reporter,
>+                  struct devlink_health_buffer **buffers_array,
>+                  unsigned int buffer_size, unsigned int num_buffers,
>+                  void *priv_ctx);
>+      int (*diagnose)(struct devlink_health_reporter *reporter,
>+                      struct devlink_health_buffer **buffers_array,
>+                      unsigned int buffer_size, unsigned int num_buffers);
>+};
> 
> struct devlink_ops {
>       int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
>@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct 
>devlink_health_buffer *buffer,
>                                          char *name);
> int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
>                                        void *data, int len);
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+                             const struct devlink_health_reporter_ops *ops,
>+                             u64 graceful_period, bool auto_recover,
>+                             void *priv);
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
> #else
> 
> static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
>@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct 
>devlink_health_buffer *buffer,
> {
>       return 0;
> }
>+
>+static inline struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+                             const struct devlink_health_reporter_ops *ops,
>+                             u64 graceful_period, bool auto_recover,
>+                             void *priv)
>+{
>+      return NULL;
>+}
>+
>+static inline void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+}
>+
>+static inline void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+      return NULL;
>+}
> #endif
> 
> #endif /* _NET_DEVLINK_H_ */
>diff --git a/net/core/devlink.c b/net/core/devlink.c
>index 8984501edade..fec169a28dba 100644
>--- a/net/core/devlink.c
>+++ b/net/core/devlink.c
>@@ -4098,6 +4098,132 @@ devlink_health_buffer_snd(struct genl_info *info,
>       return err;
> }
> 
>+struct devlink_health_reporter {
>+      struct list_head list;
>+      struct devlink_health_buffer **dump_buffers_array;
>+      struct mutex dump_lock; /* lock parallel read/write from dump buffers */
>+      struct devlink_health_buffer **diagnose_buffers_array;
>+      struct mutex diagnose_lock; /* lock parallel read/write from diagnose 
>buffers */
>+      void *priv;
>+      const struct devlink_health_reporter_ops *ops;
>+      struct devlink *devlink;
>+      u64 graceful_period;
>+      bool auto_recover;
>+      u8 health_state;
>+};
>+
>+void *
>+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
>+{
>+      return reporter->priv;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
>+
>+static struct devlink_health_reporter *
>+devlink_health_reporter_find_by_name(struct devlink *devlink,
>+                                   const char *reporter_name)
>+{
>+      struct devlink_health_reporter *reporter;
>+
>+      list_for_each_entry(reporter, &devlink->reporter_list, list)
>+              if (!strcmp(reporter->ops->name, reporter_name))
>+                      return reporter;
>+      return NULL;
>+}
>+
>+/**
>+ *    devlink_health_reporter_create - create devlink health reporter
>+ *
>+ *    @devlink: devlink
>+ *    @ops: ops
>+ *    @graceful_period: to avoid recovery loops, in msecs
>+ *    @auto_recover: auto recover when error occurs
>+ *    @priv: priv
>+ */
>+struct devlink_health_reporter *
>+devlink_health_reporter_create(struct devlink *devlink,
>+                             const struct devlink_health_reporter_ops *ops,
>+                             u64 graceful_period, bool auto_recover,
>+                             void *priv)
>+{
>+      struct devlink_health_reporter *reporter;
>+
>+      mutex_lock(&devlink->lock);
>+      if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
>+              reporter = ERR_PTR(-EEXIST);
>+              goto unlock;
>+      }
>+
>+      if (WARN_ON(ops->dump && !ops->dump_size) ||
>+          WARN_ON(ops->diagnose && !ops->diagnose_size) ||
>+          WARN_ON(auto_recover && !ops->recover) ||
>+          WARN_ON(graceful_period && !ops->recover)) {
>+              reporter = ERR_PTR(-EINVAL);
>+              goto unlock;
>+      }
>+
>+      reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
>+      if (!reporter) {
>+              reporter = ERR_PTR(-ENOMEM);
>+              goto unlock;
>+      }
>+
>+      if (ops->dump) {
>+              reporter->dump_buffers_array =
>+                      devlink_health_buffers_create(ops->dump_size);
>+              if (!reporter->dump_buffers_array) {
>+                      kfree(reporter);
>+                      reporter = ERR_PTR(-ENOMEM);
>+                      goto unlock;
>+              }
>+      }
>+
>+      if (ops->diagnose) {
>+              reporter->diagnose_buffers_array =
>+                      devlink_health_buffers_create(ops->diagnose_size);
>+              if (!reporter->diagnose_buffers_array) {
>+                      
>devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+                                                     
>DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));

This is just ugly. :/

As I wrote in the other email, should be converted to simple
"msg_ctx = msg_ctx_create(), msg_ctx_destroy(msg_ctx)", no sizes, no
buffers visible to the driver.


>+                      kfree(reporter);
>+                      reporter = ERR_PTR(-ENOMEM);
>+                      goto unlock;
>+              }
>+      }
>+
>+      list_add_tail(&reporter->list, &devlink->reporter_list);
>+      mutex_init(&reporter->dump_lock);
>+      mutex_init(&reporter->diagnose_lock);
>+
>+      reporter->priv = priv;
>+      reporter->ops = ops;
>+      reporter->devlink = devlink;
>+      reporter->graceful_period = graceful_period;
>+      reporter->auto_recover = auto_recover;
>+unlock:
>+      mutex_unlock(&devlink->lock);
>+      return reporter;
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
>+
>+/**
>+ *    devlink_health_reporter_destroy - destroy devlink health reporter
>+ *
>+ *    @reporter: devlink health reporter to destroy
>+ */
>+void
>+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
>+{
>+      mutex_lock(&reporter->devlink->lock);
>+      list_del(&reporter->list);
>+      devlink_health_buffers_destroy(reporter->dump_buffers_array,
>+                                     
>DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
>+      devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
>+                                     
>DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
>+      kfree(reporter);
>+      mutex_unlock(&reporter->devlink->lock);
>+}
>+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
>+
> static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
>       [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
>       [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
>@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops 
>*ops, size_t priv_size)
>       INIT_LIST_HEAD(&devlink->resource_list);
>       INIT_LIST_HEAD(&devlink->param_list);
>       INIT_LIST_HEAD(&devlink->region_list);
>+      INIT_LIST_HEAD(&devlink->reporter_list);
>       mutex_init(&devlink->lock);
>       return devlink;
> }
>-- 
>2.17.1
>

Reply via email to