On 7/18/2024 11:13 AM, Kamal Heib wrote:
Caution: This message originated from an External Source. Use proper caution 
when opening attachments, clicking links, or responding.


Add support for reporting fw status via the devlink health report.

Example:
  # devlink health show pci/0000:02:00.0 reporter fw
  pci/0000:02:00.0:
    reporter fw
      state healthy error 0 recover 0
  # devlink health diagnose pci/0000:02:00.0 reporter fw
  Mode: normal

Signed-off-by: Kamal Heib <kh...@redhat.com>
---
v2:
- Address comments from Jiri.
- Move the creation of the health report.
---
  drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
  .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
  .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
  drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
  4 files changed, 74 insertions(+)


<snip>

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
index cbcfada7b357..b6b3ae299b28 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
                 dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting 
functionality.\n");
                 dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters 
and Devices User Guide for details on firmware recovery mode.\n");
                 set_bit(__I40E_RECOVERY_MODE, pf->state);
+               if (pf->fw_health_report)
+                       devlink_health_report(pf->fw_health_report,
+                                             "recovery mode detected", pf);

                 return true;
         }
@@ -15810,6 +15813,13 @@ static int i40e_probe(struct pci_dev *pdev, const 
struct pci_device_id *ent)
         if (test_bit(__I40E_RECOVERY_MODE, pf->state))
                 return i40e_init_recovery_mode(pf, hw);

+       err = i40e_devlink_create_health_reporter(pf);
+       if (err) {
+               dev_err(&pdev->dev,
+                       "Failed to create health reporter %d\n", err);
+               goto err_health_reporter;

Do you really want to fail probe if creating this simple health reporter fails?

Thanks,

Brett

+       }
+
         err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
                                 hw->func_caps.num_rx_qp, 0, 0);
         if (err) {
@@ -16175,6 +16185,8 @@ static int i40e_probe(struct pci_dev *pdev, const 
struct pci_device_id *ent)
         (void)i40e_shutdown_lan_hmc(hw);
  err_init_lan_hmc:
         kfree(pf->qp_pile);
+       i40e_devlink_destroy_health_reporter(pf);
+err_health_reporter:
  err_sw_init:
  err_adminq_setup:
  err_pf_reset:
@@ -16209,6 +16221,8 @@ static void i40e_remove(struct pci_dev *pdev)

         i40e_devlink_unregister(pf);

+       i40e_devlink_destroy_health_reporter(pf);
+
         i40e_dbg_pf_exit(pf);

         i40e_ptp_stop(pf);
--
2.45.2


Reply via email to