From: Kalesh AP <kalesh-anakkur.pura...@broadcom.com> Periodically poll the FW heartbeat register and FW recovery counter registers to check the FW health. Polling frequency will be advertised by the FW in HWRM_ERROR_RECOVERY_QCFG response. Schedule the task upon receiving the async event from FW.
Signed-off-by: Kalesh AP <kalesh-anakkur.pura...@broadcom.com> Reviewed-by: Ajit Khaparde <ajit.khapa...@broadcom.com> Reviewed-by: Somnath Kotur <somnath.ko...@broadcom.com> --- drivers/net/bnxt/bnxt.h | 5 ++ drivers/net/bnxt/bnxt_cpr.c | 7 +++ drivers/net/bnxt/bnxt_ethdev.c | 89 ++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h index f9147a9a8..a23c4a64c 100644 --- a/drivers/net/bnxt/bnxt.h +++ b/drivers/net/bnxt/bnxt.h @@ -368,6 +368,9 @@ struct bnxt_error_recovery_info { #define BNXT_FLAG_MASTER_FUNC (1 << 2) #define BNXT_FLAG_RECOVERY_ENABLED (1 << 3) uint32_t flags; + + uint32_t last_heart_beat; + uint32_t last_reset_counter; }; /* address space location of register */ @@ -531,6 +534,8 @@ int bnxt_rcv_msg_from_vf(struct bnxt *bp, uint16_t vf_id, void *msg); int is_bnxt_in_error(struct bnxt *bp); int bnxt_map_fw_health_status_regs(struct bnxt *bp); +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index); +void bnxt_schedule_fw_health_check(struct bnxt *bp); bool is_bnxt_supported(struct rte_eth_dev *dev); bool bnxt_stratus_device(struct bnxt *bp); diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c index 7f5b3314e..a692fbe7c 100644 --- a/drivers/net/bnxt/bnxt_cpr.c +++ b/drivers/net/bnxt/bnxt_cpr.c @@ -88,6 +88,13 @@ void bnxt_handle_async_event(struct bnxt *bp, PMD_DRV_LOG(INFO, "recovery enabled(%d), master function(%d)\n", bnxt_is_recovery_enabled(bp), bnxt_is_master_func(bp)); + + info->last_heart_beat = + bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + info->last_reset_counter = + bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + + bnxt_schedule_fw_health_check(bp); break; default: PMD_DRV_LOG(INFO, "handle_async_event id = 0x%x\n", event_id); diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c index 52c460d2c..0317eb888 100644 --- a/drivers/net/bnxt/bnxt_ethdev.c +++ b/drivers/net/bnxt/bnxt_ethdev.c @@ -169,6 +169,7 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu); static int bnxt_dev_uninit(struct rte_eth_dev *eth_dev); static int bnxt_init_resources(struct bnxt *bp, bool reconfig_dev); static int bnxt_uninit_resources(struct bnxt *bp, bool reconfig_dev); +static void bnxt_cancel_fw_health_check(struct bnxt *bp); int is_bnxt_in_error(struct bnxt *bp) { @@ -880,6 +881,8 @@ static void bnxt_dev_stop_op(struct rte_eth_dev *eth_dev) /* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); + bnxt_cancel_fw_health_check(bp); + bp->flags &= ~BNXT_FLAG_INIT_DONE; if (bp->eth_dev->data->dev_started) { /* TBD: STOP HW queues DMA */ @@ -3608,6 +3611,92 @@ int bnxt_dev_reset_and_resume(struct bnxt *bp) return rc; } +uint32_t bnxt_read_fw_status_reg(struct bnxt *bp, uint32_t index) +{ + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t reg = info->status_regs[index]; + uint32_t type, offset, val = 0; + + type = BNXT_FW_STATUS_REG_TYPE(reg); + offset = BNXT_FW_STATUS_REG_OFF(reg); + + switch (type) { + case BNXT_FW_STATUS_REG_TYPE_CFG: + rte_pci_read_config(bp->pdev, &val, sizeof(val), offset); + break; + case BNXT_FW_STATUS_REG_TYPE_GRC: + offset = info->mapped_status_regs[index]; + /* FALLTHROUGH */ + case BNXT_FW_STATUS_REG_TYPE_BAR0: + val = rte_le_to_cpu_32(rte_read32((uint8_t *)bp->bar0 + + offset)); + break; + } + + return val; +} + +/* Driver should poll FW heartbeat, reset_counter with the frequency + * advertised by FW in HWRM_ERROR_RECOVERY_QCFG. + * When the driver detects heartbeat stop or change in reset_counter, + * it has to trigger a reset to recover from the error condition. + * A “master PF” is the function who will have the privilege to + * initiate the chimp reset. The master PF will be elected by the + * firmware and will be notified through async message. + */ +static void bnxt_check_fw_health(void *arg) +{ + struct bnxt *bp = arg; + struct bnxt_error_recovery_info *info = bp->recovery_info; + uint32_t val = 0; + + if (!info || !bnxt_is_recovery_enabled(bp) || + is_bnxt_in_error(bp)) + return; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_HEARTBEAT_CNT_REG); + if (val == info->last_heart_beat) + goto reset; + + info->last_heart_beat = val; + + val = bnxt_read_fw_status_reg(bp, BNXT_FW_RECOVERY_CNT_REG); + if (val != info->last_reset_counter) + goto reset; + + info->last_reset_counter = val; + + rte_eal_alarm_set(US_PER_MS * info->driver_polling_freq, + bnxt_check_fw_health, (void *)bp); + + return; +reset: + /* Stop DMA to/from device */ + bp->flags |= BNXT_FLAG_FATAL_ERROR; + bp->flags |= BNXT_FLAG_FW_RESET; + + PMD_DRV_LOG(ERR, "Detected FW dead condition\n"); +} + +void bnxt_schedule_fw_health_check(struct bnxt *bp) +{ + uint32_t polling_freq = bp->recovery_info->driver_polling_freq; + + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_set(US_PER_MS * polling_freq, + bnxt_check_fw_health, (void *)bp); +} + +static void bnxt_cancel_fw_health_check(struct bnxt *bp) +{ + if (!bnxt_is_recovery_enabled(bp)) + return; + + rte_eal_alarm_cancel(bnxt_check_fw_health, (void *)bp); +} + static bool bnxt_vf_pciid(uint16_t id) { if (id == BROADCOM_DEV_ID_57304_VF || -- 2.20.1 (Apple Git-117)