This patch add support for the following commands:
Devlink health show [DEV reporter REPORTE_NAME],
Devlink health recover DEV reporter REPORTER_NAME,
Devlink health diagnose DEV reporter REPORTER_NAME,
Devlink health objdump show DEV reporter REPORTER_NAME,
Devlink health objdump clear DEV reporter REPORTER_NAME,
Devlink health set DEV reporter REPORTER_NAME NAME VALUE

 * Devlink health show command displays status and configuration info on
   specific reporter on a device or dump the info on all reporters on all
   devices.
 * Devlink health recover enables the user to initiate a
   recovery on a reporter. This operation will increment the recoveries
   counter displayed in the show command.
 * Devlink health diagnose enables the user to retrieve diagnostics data
   on a reporter on a device. The command's output is a free text defined
   by the reporter.
 * Devlink health objdump show displays the last saved objdump. Devlink
   health saves a single objdump. If an objdump is not already stored by
   the Devlink for this reporter, Devlink generates a new objdump. The
   objdump can be generated automatically when a reporter reports on an
   error or by the user's request.
   Objdump output is defined by the reporter.
 * Devlink health objdump clear, deletes the last saved objdump file.
 * Devlink health set, enables the user to configure:
        1) grace_period [msec] time interval between auto recoveries.
        2) auto_recover [true/false] whether the devlink should execute
        automatic recover on error.

Examples:
$devlink health show pci/0000:00:09.0 reporter TX
pci/0000:00:09.0:
name TX state healthy #err 0 #recover 1 last_objdump_ts N/A objdump_available 
false
  attributes:
    grace period 600 auto recover true
$devlink health diagnose pci/0000:00:09.0 reporter TX
SQ 0x9b: HW state: 1 stopped: 0
SQ 0x9f: HW state: 1 stopped: 0
SQ 0xa3: HW state: 1 stopped: 0
SQ 0xa7: HW state: 1 stopped: 0
SQ 0xab: HW state: 1 stopped: 0
$devlink health objdump show pci/0000:00:09.0 reporter TX
TX dump data
$devlink health objdump clear pci/0000:00:09.0 reporter TX
$devlink health set pci/0000:00:09.0 reporter TX grace_period 3500
$devlink health set pci/0000:00:09.0 reporter TX auto_recover false

Signed-off-by: Aya Levin <a...@mellanox.com>
---
 devlink/devlink.c            | 531 ++++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/devlink.h |  29 +++
 man/man8/devlink-health.8    | 175 ++++++++++++++
 man/man8/devlink.8           |   7 +-
 4 files changed, 739 insertions(+), 3 deletions(-)
 create mode 100644 man/man8/devlink-health.8

diff --git a/devlink/devlink.c b/devlink/devlink.c
index 3651e90c1159..24dd852d8478 100644
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
@@ -40,6 +40,11 @@
 #define PARAM_CMODE_DRIVERINIT_STR "driverinit"
 #define PARAM_CMODE_PERMANENT_STR "permanent"
 
+#define HEALTH_REPORTER_STATE_HEALTHY_STR "healthy"
+#define HEALTH_REPORTER_STATE_ERROR_STR "error"
+#define HEALTH_REPORTER_GRACE_PERIOD_STR "grace_period"
+#define HEALTH_REPORTER_AUTO_RECOVER_STR "auto_recover"
+
 static int g_new_line_count;
 
 #define pr_err(args...) fprintf(stderr, ##args)
@@ -199,6 +204,10 @@ static void ifname_map_free(struct ifname_map *ifname_map)
 #define DL_OPT_REGION_SNAPSHOT_ID      BIT(22)
 #define DL_OPT_REGION_ADDRESS          BIT(23)
 #define DL_OPT_REGION_LENGTH           BIT(24)
+#define DL_OPT_HANDLE_HEALTH           BIT(25)
+#define DL_OPT_HEALTH_REPORTER_NAME    BIT(26)
+#define DL_OPT_HEALTH_REPORTER_DEV     BIT(27)
+
 
 struct dl_opts {
        uint32_t present; /* flags of present items */
@@ -230,6 +239,10 @@ struct dl_opts {
        uint32_t region_snapshot_id;
        uint64_t region_address;
        uint64_t region_length;
+       const char *reporter_name;
+       const char *reporter_param_name;
+       const char *reporter_param_value;
+
 };
 
 struct dl {
@@ -959,7 +972,7 @@ static int dl_argv_parse(struct dl *dl, uint32_t o_required,
                if (err)
                        return err;
                o_found |= handle_bit;
-       } else if (o_required & DL_OPT_HANDLE) {
+       } else if (DL_OPT_HANDLE) {
                err = dl_argv_handle(dl, &opts->bus_name, &opts->dev_name);
                if (err)
                        return err;
@@ -1178,6 +1191,15 @@ static int dl_argv_parse(struct dl *dl, uint32_t 
o_required,
                        if (err)
                                return err;
                        o_found |= DL_OPT_REGION_LENGTH;
+               } else if (dl_argv_match(dl, "reporter") &&
+                          (o_all & DL_OPT_HEALTH_REPORTER_NAME)) {
+                       dl_arg_inc(dl);
+                       err = dl_argv_str(dl, &opts->reporter_name);
+                       if (err)
+                               return err;
+                       o_found |= DL_OPT_HEALTH_REPORTER_NAME;
+                       o_found |= DL_OPT_HANDLE;
+                       break;
                } else {
                        pr_err("Unknown option \"%s\"\n", dl_argv(dl));
                        return -EINVAL;
@@ -1298,6 +1320,12 @@ static int dl_argv_parse(struct dl *dl, uint32_t 
o_required,
                return -EINVAL;
        }
 
+       if ((o_required & DL_OPT_HEALTH_REPORTER_NAME) &&
+           !(o_found & DL_OPT_HEALTH_REPORTER_NAME)) {
+               pr_err("Reporter name expected.\n");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -1382,6 +1410,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl 
*dl)
        if (opts->present & DL_OPT_REGION_LENGTH)
                mnl_attr_put_u64(nlh, DEVLINK_ATTR_REGION_CHUNK_LEN,
                                 opts->region_length);
+       if (opts->present & DL_OPT_HEALTH_REPORTER_NAME)
+               mnl_attr_put_strz(nlh, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+                                 opts->reporter_name);
 }
 
 static int dl_argv_parse_put(struct nlmsghdr *nlh, struct dl *dl,
@@ -1513,6 +1544,8 @@ static void __pr_out_handle_start(struct dl *dl, struct 
nlattr **tb,
                                __pr_out_newline();
                                __pr_out_indent_inc();
                                arr_last_handle_set(dl, bus_name, dev_name);
+                       } else {
+                               __pr_out_indent_inc();
                        }
                } else {
                        pr_out("%s%s", buf, content ? ":" : "");
@@ -5557,11 +5590,501 @@ static int cmd_region(struct dl *dl)
        return -ENOENT;
 }
 
+static int cmd_health_set_params(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       uint64_t period;
+       bool auto_recover;
+       int err;
+
+       nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_HEALTH_REPORTER_SET,
+                              NLM_F_REQUEST | NLM_F_ACK);
+       err = dl_argv_parse(dl, DL_OPT_HANDLE |
+                           DL_OPT_HEALTH_REPORTER_NAME, 0);
+       if (err)
+               return err;
+
+       err = dl_argv_str(dl, &dl->opts.reporter_param_name);
+       if (err)
+               return err;
+       err = dl_argv_str(dl, &dl->opts.reporter_param_value);
+       if (err)
+               return err;
+       dl_opts_put(nlh, dl);
+
+       if (!strncmp(dl->opts.reporter_param_name,
+                    HEALTH_REPORTER_GRACE_PERIOD_STR, strlen("garce"))) {
+               err = strtouint64_t(dl->opts.reporter_param_value, &period);
+               if (err)
+                       goto err_param_value_parse;
+               mnl_attr_put_u64(nlh, DEVLINK_ATTR_HEALTH_REPORTER_PERIOD,
+                                period);
+       } else if (!strncmp(dl->opts.reporter_param_name,
+                           HEALTH_REPORTER_AUTO_RECOVER_STR,
+                           strlen("auto"))) {
+               err = strtobool(dl->opts.reporter_param_value, &auto_recover);
+               if (err)
+                       goto err_param_value_parse;
+               mnl_attr_put_u8(nlh, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_REC,
+                               (uint8_t)auto_recover);
+       } else {
+               printf("Parameter name: %s  is not supported\n",
+                      dl->opts.reporter_param_name);
+               return -ENOTSUP;
+       }
+
+       return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
+
+err_param_value_parse:
+       pr_err("Value \"%s\" is not a number or not within range\n",
+              dl->opts.param_value);
+       return err;
+}
+
+static int cmd_health_objdump_clear(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = mnlg_msg_prepare(dl->nlg,
+                              DEVLINK_CMD_HEALTH_REPORTER_OBJDUMP_CLEAR,
+                              NLM_F_REQUEST | NLM_F_ACK);
+
+       err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE |
+                               DL_OPT_HEALTH_REPORTER_NAME, 0);
+       if (err)
+               return err;
+
+       dl_opts_put(nlh, dl);
+       return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
+}
+
+static int health_value_show(struct dl *dl, int type, struct nlattr *nl_data)
+{
+       const char *str;
+       uint8_t *data;
+       uint32_t len;
+       uint64_t u64;
+       uint32_t u32;
+       uint16_t u16;
+       uint8_t u8;
+       int i;
+
+       switch (type) {
+       case MNL_TYPE_FLAG:
+               if (dl->json_output)
+                       jsonw_string(dl->jw, nl_data ? "true" : "false");
+               else
+                       pr_out("%s ", nl_data ? "true" : "false");
+               break;
+       case MNL_TYPE_U8:
+               u8 = mnl_attr_get_u8(nl_data);
+               if (dl->json_output)
+                       jsonw_uint(dl->jw, u8);
+               else
+                       pr_out("%u ", u8);
+               break;
+       case MNL_TYPE_U16:
+               u16 = mnl_attr_get_u16(nl_data);
+               if (dl->json_output)
+                       jsonw_uint(dl->jw, u16);
+               else
+                       pr_out("%u ", u16);
+               break;
+       case MNL_TYPE_U32:
+               u32 = mnl_attr_get_u32(nl_data);
+               if (dl->json_output)
+                       jsonw_uint(dl->jw, u32);
+               else
+                       pr_out("%u ", u32);
+               break;
+       case MNL_TYPE_U64:
+               u64 = mnl_attr_get_u64(nl_data);
+               if (dl->json_output)
+                       jsonw_u64(dl->jw, u64);
+               else
+                       pr_out("%lu ", u64);
+               break;
+       case MNL_TYPE_STRING:
+       case MNL_TYPE_NUL_STRING:
+               str = mnl_attr_get_str(nl_data);
+               if (dl->json_output)
+                       jsonw_string(dl->jw, str);
+               else
+                       pr_out("%s ", str);
+               break;
+       case MNL_TYPE_BINARY:
+               len = mnl_attr_get_payload_len(nl_data);
+               data = mnl_attr_get_payload(nl_data);
+               i = 0;
+               while (i < len) {
+                       if (dl->json_output)
+                               jsonw_printf(dl->jw, "%d", data[i]);
+                       else
+                               pr_out("%02x ", data[i]);
+                       i++;
+               }
+               break;
+       default:
+               return -EINVAL;
+       }
+       return MNL_CB_OK;
+}
+
+static int health_object_pair_show(struct dl *dl, struct nlattr *nl)
+{
+       struct nlattr *nla_pair[DEVLINK_ATTR_MAX + 1] = {};
+       struct nlattr *nla_value[DEVLINK_ATTR_MAX + 1] = {};
+       struct nlattr *nla_value_data;
+       struct nlattr *nla_obj_pair;
+       struct nlattr *nla_object;
+       struct nlattr *nla_array;
+       struct nlattr *nla_val;
+       int err, type;
+       const char *name;
+
+       err = mnl_attr_parse_nested(nl, attr_cb, nla_pair);
+       if (err != MNL_CB_OK)
+               return -EINVAL;
+
+       if (!nla_pair[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME] ||
+           !nla_pair[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE])
+               return -EINVAL;
+
+       name = 
mnl_attr_get_str(nla_pair[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME]);
+       if (dl->json_output)
+               jsonw_name(dl->jw, name);
+       else
+               pr_out("%s: ", name);
+
+       nla_val = nla_pair[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE];
+       err = mnl_attr_parse_nested(nla_val, attr_cb, nla_value);
+       if (err != MNL_CB_OK)
+               return -EINVAL;
+
+       if (!nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT] &&
+           !nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY] &&
+           !nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE])
+               return -EINVAL;
+
+       if (nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT]) {
+               nla_object = nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT];
+               /*inside must be an object pair*/
+               if (dl->json_output)
+                       jsonw_start_object(dl->jw);
+
+               mnl_attr_for_each_nested(nla_obj_pair, nla_object) {
+                       err = health_object_pair_show(dl, nla_obj_pair);
+                       if (err != MNL_CB_OK)
+                               break;
+               }
+               if (dl->json_output)
+                       jsonw_end_object(dl->jw);
+       }
+       if (nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE]) {
+               type = 
mnl_attr_get_u8(nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE]);
+               if (!nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA])
+                       return -EINVAL;
+               nla_value_data = 
nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA];
+               if (type  == MNL_TYPE_FLAG)
+               health_value_show(dl, type, nla_value_data);
+       }
+       if (nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY]) {
+               if (dl->json_output)
+                       jsonw_start_array(dl->jw);
+               nla_array = 
nla_value[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY];
+               mnl_attr_for_each_nested(nla_object, nla_array) {
+                       mnl_attr_for_each_nested(nla_obj_pair, nla_object) {
+                               err = health_object_pair_show(dl, nla_obj_pair);
+                               if (err != MNL_CB_OK)
+                                       break;
+                       }
+               }
+               if (dl->json_output)
+                       jsonw_end_array(dl->jw);
+       }
+       return err;
+}
+
+static int cmd_health_object_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+       struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
+       struct nlattr *nla_object;
+       struct nlattr *nla_object_pair;
+       struct dl *dl = data;
+       int err = MNL_CB_OK;
+
+       mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
+       if (!tb[DEVLINK_ATTR_HEALTH_BUFFER_OBJECT])
+               return MNL_CB_ERROR;
+
+       mnl_attr_for_each(nla_object, nlh, sizeof(*genl)) {
+               mnl_attr_for_each_nested(nla_object_pair, nla_object) {
+                       err = health_object_pair_show(dl, nla_object_pair);
+                       if (err != MNL_CB_OK)
+                               break;
+               }
+               if (!dl->json_output)
+                       pr_out("\n");
+       }
+
+       return err;
+}
+
+static int cmd_health_objdump_show(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = mnlg_msg_prepare(dl->nlg,
+                              DEVLINK_CMD_HEALTH_REPORTER_OBJDUMP_GET,
+                              NLM_F_REQUEST | NLM_F_ACK);
+
+       err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE |
+                               DL_OPT_HEALTH_REPORTER_NAME, 0);
+       if (err)
+               return err;
+
+       if (dl->json_output)
+               jsonw_start_object(dl->jw);
+
+       err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_health_object_cb, dl);
+       if (err) {
+               jsonw_end_object(dl->jw);
+               return err;
+       }
+
+       if (dl->json_output)
+               jsonw_end_object(dl->jw);
+
+       return err;
+}
+
+static int cmd_health_diagnose(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+                              NLM_F_REQUEST | NLM_F_ACK);
+
+       err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE |
+                               DL_OPT_HEALTH_REPORTER_NAME, 0);
+       if (err)
+               return err;
+
+       if (dl->json_output)
+               jsonw_start_object(dl->jw);
+
+       err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_health_object_cb, dl);
+       if (err) {
+               jsonw_end_object(dl->jw);
+               return err;
+       }
+
+       if (dl->json_output)
+               jsonw_end_object(dl->jw);
+
+       return err;
+}
+
+static int cmd_health_recover(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       int err;
+
+       nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+                              NLM_F_REQUEST | NLM_F_ACK);
+
+       err = dl_argv_parse_put(nlh, dl, DL_OPT_HANDLE |
+                               DL_OPT_HEALTH_REPORTER_NAME, 0);
+       if (err)
+               return err;
+
+       dl_opts_put(nlh, dl);
+       return _mnlg_socket_sndrcv(dl->nlg, nlh, NULL, NULL);
+}
+
+static void pr_out_array_parameters_start(struct dl *dl, const char *name)
+{
+       if (dl->json_output) {
+               jsonw_name(dl->jw, name);
+               jsonw_start_array(dl->jw);
+       } else {
+               __pr_out_newline();
+               pr_out("%s:", name);
+                __pr_out_indent_inc();
+               __pr_out_newline();
+       }
+}
+
+static const char *health_state_name(uint8_t state)
+{
+       switch (state) {
+       case DEVLINK_HEALTH_REPORTER_STATE_HEALTHY:
+               return HEALTH_REPORTER_STATE_HEALTHY_STR;
+       case DEVLINK_HEALTH_REPORTER_STATE_ERROR:
+               return HEALTH_REPORTER_STATE_ERROR_STR;
+       default: return "<unknown state>";
+       }
+}
+
+static void pr_out_health(struct dl *dl, struct nlattr **tb)
+{
+       struct nlattr *hlt[DEVLINK_ATTR_MAX + 1] = {};
+       enum devlink_health_reporter_state state;
+       char dump_time_date[80] = "N/A";
+       bool auto_recover = false;
+       const struct nlattr *attr;
+       bool dmp = false;
+       struct timeval tv;
+       uint64_t jiffies;
+       struct tm *info;
+       int err;
+
+       state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+
+       err = mnl_attr_parse_nested(tb[DEVLINK_ATTR_HEALTH_REPORTER], attr_cb,
+                                   hlt);
+       if (err != MNL_CB_OK)
+               return;
+
+       if (!hlt[DEVLINK_ATTR_HEALTH_REPORTER_NAME]             ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_ERR]              ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_RECOVER]          ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_AVAIL]    ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_STATE]            ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_REC]         ||
+           !hlt[DEVLINK_ATTR_HEALTH_REPORTER_PERIOD])
+               return;
+
+       dmp = mnl_attr_get_u8(hlt[DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_AVAIL]);
+       if (dmp) {
+               if (hlt[DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_TS]) {
+                       attr = hlt[DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_TS];
+                       jiffies = mnl_attr_get_u64(attr);
+                       __jiffies_to_tv(&tv, jiffies);
+                       info = localtime(&tv.tv_sec);
+                       strftime(dump_time_date, 80, "%b %d %l:%M:%S", info);
+               }
+       }
+
+       pr_out_handle_start_arr(dl, tb);
+
+       pr_out_str(dl, "name",
+                  mnl_attr_get_str(hlt[DEVLINK_ATTR_HEALTH_REPORTER_NAME]));
+       state = mnl_attr_get_u8(hlt[DEVLINK_ATTR_HEALTH_REPORTER_STATE]);
+       pr_out_str(dl, "state", health_state_name(state));
+       pr_out_u64(dl, "#err",
+                  mnl_attr_get_u64(hlt[DEVLINK_ATTR_HEALTH_REPORTER_ERR]));
+       pr_out_u64(dl, "#recover",
+                  mnl_attr_get_u64(hlt[DEVLINK_ATTR_HEALTH_REPORTER_RECOVER]));
+       pr_out_str(dl, "last_objdump_ts", dump_time_date);
+       pr_out_bool(dl, "objdump_available", dmp);
+       pr_out_array_parameters_start(dl, "parameters");
+       pr_out_entry_start(dl);
+       pr_out_u64(dl, "grace_period",
+                  mnl_attr_get_u64(hlt[DEVLINK_ATTR_HEALTH_REPORTER_PERIOD]));
+       auto_recover = 
mnl_attr_get_u8(hlt[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_REC]);
+       pr_out_bool(dl, "auto_recover", auto_recover);
+       pr_out_entry_end(dl);
+       pr_out_array_end(dl);
+
+       pr_out_handle_end(dl);
+}
+
+static int cmd_health_show_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+       struct nlattr *tb[DEVLINK_ATTR_MAX + 1] = {};
+       struct dl *dl = data;
+
+       mnl_attr_parse(nlh, sizeof(*genl), attr_cb, tb);
+       if (!tb[DEVLINK_ATTR_BUS_NAME] || !tb[DEVLINK_ATTR_DEV_NAME] ||
+           !tb[DEVLINK_ATTR_HEALTH_REPORTER])
+               return MNL_CB_ERROR;
+
+       pr_out_health(dl, tb);
+
+       return MNL_CB_OK;
+}
+
+static int cmd_health_show(struct dl *dl)
+{
+       struct nlmsghdr *nlh;
+       uint16_t flags = NLM_F_REQUEST | NLM_F_ACK;
+       int err;
+
+       if (dl_argc(dl) == 0)
+               flags |= NLM_F_DUMP;
+       nlh = mnlg_msg_prepare(dl->nlg, DEVLINK_CMD_HEALTH_REPORTER_GET,
+                              flags);
+
+       if (dl_argc(dl) > 0) {
+               err = dl_argv_parse_put(nlh, dl, 0, DL_OPT_HANDLE |
+                                       DL_OPT_HEALTH_REPORTER_NAME);
+               if (err)
+                       return err;
+       }
+       pr_out_section_start(dl, "health");
+
+       err = _mnlg_socket_sndrcv(dl->nlg, nlh, cmd_health_show_cb, dl);
+       if (err)
+               return err;
+       pr_out_section_end(dl);
+       return err;
+}
+
+static void cmd_health_help(void)
+{
+       pr_err("Usage: devlink health show [ dev DEV reporter REPORTER_NAME 
]\n");
+       pr_err("Usage: devlink health recover DEV reporter REPORTER_NAME\n");
+       pr_err("Usage: devlink health diagnose DEV reporter REPORTER_NAME\n");
+       pr_err("Usage: devlink health objdump show DEV reporter 
REPORTER_NAME\n");
+       pr_err("Usage: devlink health objdump clear DEV reporter 
REPORTER_NAME\n");
+       pr_err("Usage: devlink health set DEV reporter REPORTER_NAME NAME 
VALUE\n");
+}
+
+static int cmd_health(struct dl *dl)
+{
+       if (dl_no_arg(dl)) {
+               return cmd_health_show(dl);
+       } else if (dl_argv_match(dl, "help")) {
+               cmd_health_help();
+               return 0;
+       } else if (dl_argv_match(dl, "show")) {
+               dl_arg_inc(dl);
+               return cmd_health_show(dl);
+       } else if (dl_argv_match(dl, "recover")) {
+               dl_arg_inc(dl);
+               return cmd_health_recover(dl);
+       } else if (dl_argv_match(dl, "diagnose")) {
+               dl_arg_inc(dl);
+               return cmd_health_diagnose(dl);
+       } else if (dl_argv_match(dl, "objdump")) {
+               dl_arg_inc(dl);
+               if (dl_argv_match(dl, "show")) {
+                       dl_arg_inc(dl);
+                       return cmd_health_objdump_show(dl);
+               } else if (dl_argv_match(dl, "clear")) {
+                       dl_arg_inc(dl);
+                       return cmd_health_objdump_clear(dl);
+               }
+       } else if (dl_argv_match(dl, "set")) {
+               dl_arg_inc(dl);
+               return cmd_health_set_params(dl);
+       }
+
+       pr_err("Command \"%s\" not found\n", dl_argv(dl));
+       return -ENOENT;
+}
+
 static void help(void)
 {
        pr_err("Usage: devlink [ OPTIONS ] OBJECT { COMMAND | help }\n"
               "       devlink [ -f[orce] ] -b[atch] filename\n"
-              "where  OBJECT := { dev | port | sb | monitor | dpipe | resource 
| region }\n"
+              "where  OBJECT := { dev | port | sb | monitor | dpipe | resource 
| region | health }\n"
               "       OPTIONS := { -V[ersion] | -n[o-nice-names] | -j[son] | 
-p[retty] | -v[erbose] }\n");
 }
 
@@ -5594,7 +6117,11 @@ static int dl_cmd(struct dl *dl, int argc, char **argv)
        } else if (dl_argv_match(dl, "region")) {
                dl_arg_inc(dl);
                return cmd_region(dl);
+       } else if (dl_argv_match(dl, "health")) {
+               dl_arg_inc(dl);
+               return cmd_health(dl);
        }
+
        pr_err("Object \"%s\" not found\n", dl_argv(dl));
        return -ENOENT;
 }
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index d0a33d79dc22..2f3bbf39e887 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -89,6 +89,12 @@ enum devlink_command {
        DEVLINK_CMD_REGION_DEL,
        DEVLINK_CMD_REGION_READ,
 
+       DEVLINK_CMD_HEALTH_REPORTER_GET,
+       DEVLINK_CMD_HEALTH_REPORTER_SET,
+       DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+       DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+       DEVLINK_CMD_HEALTH_REPORTER_OBJDUMP_GET,
+       DEVLINK_CMD_HEALTH_REPORTER_OBJDUMP_CLEAR,
        /* add new commands above here */
        __DEVLINK_CMD_MAX,
        DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -168,6 +174,11 @@ enum devlink_param_fw_load_policy_value {
        DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
 };
 
+enum devlink_health_reporter_state {
+       DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+       DEVLINK_HEALTH_REPORTER_STATE_ERROR,
+};
+
 enum devlink_attr {
        /* don't change the order or add anything between, this is ABI! */
        DEVLINK_ATTR_UNSPEC,
@@ -285,6 +296,24 @@ enum devlink_attr {
        DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
        DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
 
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT,              /* nested */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR,         /* nested */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME,         /* string */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE,        /* nested */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY,  /* nested */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,   /* u8 */
+       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,   /* dynamic */
+
+       DEVLINK_ATTR_HEALTH_REPORTER,                   /* nested */
+       DEVLINK_ATTR_HEALTH_REPORTER_NAME,              /* string */
+       DEVLINK_ATTR_HEALTH_REPORTER_STATE,             /* u8 */
+       DEVLINK_ATTR_HEALTH_REPORTER_ERR,               /* u64 */
+       DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,           /* u64 */
+       DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_AVAIL,     /* u8 */
+       DEVLINK_ATTR_HEALTH_REPORTER_OBJDUMP_TS,        /* u64 */
+       DEVLINK_ATTR_HEALTH_REPORTER_PERIOD,            /* u64 */
+       DEVLINK_ATTR_HEALTH_REPORTER_AUTO_REC,          /* u8 */
+
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
diff --git a/man/man8/devlink-health.8 b/man/man8/devlink-health.8
new file mode 100644
index 000000000000..19de4079ff67
--- /dev/null
+++ b/man/man8/devlink-health.8
@@ -0,0 +1,175 @@
+.TH DEVLINK\-HEALTH 8 "27 Dec 2018" "iproute2" "Linux"
+.SH NAME
+devlink-health \- devlink health reporting and recovery
+.SH SYNOPSIS
+.sp
+.ad l
+.in +8
+.ti -8
+.B devlink
+.RI "[ " OPTIONS " ]"
+.B health
+.RI  " { " COMMAND " | "
+.BR help " }"
+.sp
+.ti -8
+.IR OPTIONS " := { "
+\fB\-V\fR[\fIersion\fR] }
+.ti -8
+.BR "devlink health show"
+.RI "[ "
+.RI "" DEV ""
+.BR " reporter "
+.RI ""REPORTER " ] "
+.ti -8
+.BR "devlink health recover"
+.RI "" DEV ""
+.BR "reporter"
+.RI "" REPORTER ""
+.ti -8
+.BR "devlink health diagnose"
+.RI "" DEV ""
+.BR "reporter"
+.RI "" REPORTER ""
+.ti -8
+.BR "devlink health objdump show"
+.RI "" DEV ""
+.BR "reporter"
+.RI "" REPORTER ""
+.ti -8
+.BR "devlink health objdump clear"
+.RI "" DEV ""
+.BR "reporter"
+.RI "" REPORTER ""
+.ti -8
+.BR "devlink health set"
+.RI "" DEV ""
+.BR "reporter"
+.RI "" REPORTER ""
+.RI "" NAME ""
+.RI "" VALUE ""
+.ti -8
+.B devlink health help
+.SH "DESCRIPTION"
+.SS devlink health show - Show status and configuration on all supported 
reporters on all devlink devices.
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SS devlink health recover - Initiate a recovery operation on a reporter.
+This action performs a recovery and increases the recoveries counter on 
success.
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SS devlink health diagnose - Retrieve diagnostics data on a reporter.
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SS devlink health objdump show - Display the last saved objdump.
+.PD 0
+.P
+Devlink health saves a single objdump per reporter. If an objdump is
+.P
+not already stored by the Devlink, this command will generate a new
+.P
+objdump. The objdump can be generated either automatically when a
+.P
+reporter reports on an error or manually at the user's request.
+.PD
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SS devlink health objdump clear - Delete the saved objdump.
+Deleting the save objdump enables a generation of a new objdump on
+.PD 0
+.P
+the next "devlink health objdump show" command.
+.PD
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SS devlink health set - Enable the user to configure:
+.PD 0
+1) grace_period [msec] - Time interval between auto recoveries.
+.P
+2) auto_recover [true/false] - Indicates whether the devlink should execute 
automatic recover on error.
+.PD
+.PP
+.I "DEV"
+- specifies the devlink device.
+.PP
+.I "REPORTER"
+- specifies the reporter's name registered on the devlink device.
+.SH "EXAMPLES"
+.PP
+devlink health show
+.RS 4
+pci/0000:00:09.0:
+  name TX state healthy #err 1 #recover 1 last_objdump_ts N/A 
objdump_available false
+  attributes:
+    grace period 600 auto recover true
+.RE
+.PP
+devlink health recover pci/0000:00:09.0 reporter TX
+.RS 4
+Initiate recovery on TX reporter registered on pci/0000:00:09.0.
+.RE
+.PP
+devlink health diagnose pci/0000:00:09.0 reporter TX
+.RS 4
+.PD 0
+SQ 0x9b: HW state: 1 stopped: 0
+.P
+SQ 0x9f: HW state: 1 stopped: 0
+.P
+SQ 0xa3: HW state: 1 stopped: 0
+.P
+SQ 0xa7: HW state: 1 stopped: 0
+.P
+SQ 0xab: HW state: 1 stopped: 0
+.PD
+.RE
+.PP
+devlink health objdump show  pci/0000:00:09.0 reporter TX
+.RS 4
+Display the last saved objdump on TX reporter registered on pci/0000:00:09.0.
+.RE
+.PP
+devlink health objdump clear pci/0000:00:09.0 reporter TX
+.RS 4
+Delete saved objdump on TX reporter registered on pci/0000:00:09.0.
+.RE
+.PP
+devlink health set pci/0000:00:09.0 reporter TX grace_period 3500
+.RS 4
+Set time interval between auto recoveries to minimum of 3500 mSec on
+TX reporter registered on pci/0000:00:09.0.
+.RE
+.PP
+devlink health set pci/0000:00:09.0 reporter TX auto_recover false
+.RS 4
+Turn off auto recovery on TX reporter registered on pci/0000:00:09.0.
+.RE
+.SH SEE ALSO
+.BR devlink (8),
+.BR devlink-dev (8),
+.BR devlink-port (8),
+.BR devlink-region (8),
+.br
+
+.SH AUTHOR
+Aya Levin <a...@mellanox.com>
diff --git a/man/man8/devlink.8 b/man/man8/devlink.8
index 8d527e7e1d60..13d4dcd908b3 100644
--- a/man/man8/devlink.8
+++ b/man/man8/devlink.8
@@ -7,7 +7,7 @@ devlink \- Devlink tool
 .in +8
 .ti -8
 .B devlink
-.RI "[ " OPTIONS " ] { " dev | port | monitor | sb | resource | region " } { " 
COMMAND " | "
+.RI "[ " OPTIONS " ] { " dev | port | monitor | sb | resource | region | 
health " } { " COMMAND " | "
 .BR help " }"
 .sp
 
@@ -78,6 +78,10 @@ Turn on verbose output.
 .B region
 - devlink address region access
 
+.TP
+.B health
+- devlink reporting and recovery
+
 .SS
 .I COMMAND
 
@@ -109,6 +113,7 @@ Exit status is 0 if command was successful or a positive 
integer upon failure.
 .BR devlink-sb (8),
 .BR devlink-resource (8),
 .BR devlink-region (8),
+.BR devlink-health (8),
 .br
 
 .SH REPORTING BUGS
-- 
2.14.1

Reply via email to