On Mon, May 12, 2025 at 12:57:54PM -0700, Haiyang Zhang wrote: > To collaborate with hardware servicing events, upon receiving the special > EQE notification from the HW channel, remove the devices on this bus. > Then, after a waiting period based on the device specs, rescan the parent > bus to recover the devices. > > Signed-off-by: Haiyang Zhang <haiya...@microsoft.com> > --- > v3: > Updated for checkpatch warnings as suggested by Simon Horman. > > v2: > Added dev_dbg for service type as suggested by Shradha Gupta. > Added driver cap bit. > > --- > .../net/ethernet/microsoft/mana/gdma_main.c | 64 +++++++++++++++++++ > include/net/mana/gdma.h | 11 +++- > 2 files changed, 73 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c > b/drivers/net/ethernet/microsoft/mana/gdma_main.c > index 4ffaf7588885..3102bd2b875b 100644 > --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c > +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c > @@ -352,11 +352,55 @@ void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) > } > EXPORT_SYMBOL_NS(mana_gd_ring_cq, "NET_MANA"); > > +#define MANA_SERVICE_PERIOD 10 > + > +struct mana_serv_work { > + struct work_struct serv_work; > + struct pci_dev *pdev; > +}; > + > +static void mana_serv_func(struct work_struct *w) > +{ > + struct mana_serv_work *mns_wk; > + struct pci_bus *bus, *parent; > + struct pci_dev *pdev; > + > + mns_wk = container_of(w, struct mana_serv_work, serv_work); > + pdev = mns_wk->pdev; > + > + if (!pdev) > + goto out; > + > + bus = pdev->bus; > + if (!bus) { > + dev_err(&pdev->dev, "MANA service: no bus\n"); > + goto out; > + } > + > + parent = bus->parent; > + if (!parent) { > + dev_err(&pdev->dev, "MANA service: no parent bus\n"); > + goto out; > + } > + > + pci_stop_and_remove_bus_device_locked(bus->self); > + > + msleep(MANA_SERVICE_PERIOD * 1000); > + > + pci_lock_rescan_remove(); > + pci_rescan_bus(parent); > + pci_unlock_rescan_remove(); > + > +out: > + kfree(mns_wk); > +} > + > static void mana_gd_process_eqe(struct gdma_queue *eq) > { > u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); > struct gdma_context *gc = eq->gdma_dev->gdma_context; > struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; > + struct mana_serv_work *mns_wk; > union gdma_eqe_info eqe_info; > enum gdma_eqe_type type; > struct gdma_event event; > @@ -400,6 +444,26 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) > eq->eq.callback(eq->eq.context, eq, &event); > break; > > + case GDMA_EQE_HWC_FPGA_RECONFIG: > + case GDMA_EQE_HWC_SOCMANA_CRASH: > + dev_dbg(gc->dev, "Recv MANA service type:%d\n", type); > + > + if (gc->in_service) { > + dev_info(gc->dev, "Already in service\n"); > + break; > + } > + > + mns_wk = kzalloc(sizeof(*mns_wk), GFP_ATOMIC); > + if (!mns_wk) > + break; > + > + dev_info(gc->dev, "Start MANA service type:%d\n", type); > + gc->in_service = true; > + mns_wk->pdev = to_pci_dev(gc->dev); > + INIT_WORK(&mns_wk->serv_work, mana_serv_func); > + schedule_work(&mns_wk->serv_work); > + break; > + > default: > break; > } > diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h > index 228603bf03f2..d0fbc9c64cc8 100644 > --- a/include/net/mana/gdma.h > +++ b/include/net/mana/gdma.h > @@ -58,8 +58,9 @@ enum gdma_eqe_type { > GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, > GDMA_EQE_HWC_INIT_DATA = 130, > GDMA_EQE_HWC_INIT_DONE = 131, > - GDMA_EQE_HWC_SOC_RECONFIG = 132, > + GDMA_EQE_HWC_FPGA_RECONFIG = 132, > GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, > + GDMA_EQE_HWC_SOCMANA_CRASH = 135, > GDMA_EQE_RNIC_QP_FATAL = 176, > }; > > @@ -388,6 +389,8 @@ struct gdma_context { > u32 test_event_eq_id; > > bool is_pf; > + bool in_service; > + > phys_addr_t bar0_pa; > void __iomem *bar0_va; > void __iomem *shm_base; > @@ -558,12 +561,16 @@ enum { > /* Driver can handle holes (zeros) in the device list */ > #define GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP BIT(11) > > +/* Driver can self reset on EQE notification */ > +#define GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE BIT(14) > + > #define GDMA_DRV_CAP_FLAGS1 \ > (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ > GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ > GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ > GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT | \ > - GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP) > + GDMA_DRV_CAP_FLAG_1_DEV_LIST_HOLES_SUP | \ > + GDMA_DRV_CAP_FLAG_1_SELF_RESET_ON_EQE) > > #define GDMA_DRV_CAP_FLAGS2 0 > > -- > 2.34.1
Reviewed-by: Shradha Gupta <shradhagu...@linux.microsoft.com>