Reviewed-by: Jacek Lawrynowicz <jacek.lawrynow...@linux.intel.com>

On 17.11.2023 18:43, Jeffrey Hugo wrote:
> From: Carl Vanderlip <quic_ca...@quicinc.com>
> 
> Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL
> channel becomes available. This is when the device is able to process
> workloads. However, the DRM driver also provides the debugfs interface
> bootlog for the device. If the device fails to boot to the QSM (which
> brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for
> debugging why it failed to boot.
> 
> Change when the DRM device registers itself from when QAIC_CONTROL is
> available to when the card is first probed on the PCI bus. Additionally,
> make the DRM driver persist through reset/error cases so the driver
> doesn't have to be reloaded to access the card again. Send
> KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is
> ready to handle requests.
> 
> Signed-off-by: Carl Vanderlip <quic_ca...@quicinc.com>
> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkano...@quicinc.com>
> Reviewed-by: Jeffrey Hugo <quic_jh...@quicinc.com>
> Signed-off-by: Jeffrey Hugo <quic_jh...@quicinc.com>
> ---
>  Documentation/accel/qaic/qaic.rst   |  9 +++++-
>  drivers/accel/qaic/mhi_controller.c |  2 +-
>  drivers/accel/qaic/qaic.h           |  2 +-
>  drivers/accel/qaic/qaic_drv.c       | 44 +++++++++++------------------
>  4 files changed, 27 insertions(+), 30 deletions(-)
> 
> diff --git a/Documentation/accel/qaic/qaic.rst 
> b/Documentation/accel/qaic/qaic.rst
> index f81020736ebf..efb7771273bb 100644
> --- a/Documentation/accel/qaic/qaic.rst
> +++ b/Documentation/accel/qaic/qaic.rst
> @@ -93,8 +93,15 @@ commands (does not impact QAIC).
>  uAPI
>  ====
>  
> +QAIC creates an accel device per phsyical PCIe device. This accel device 
> exists
> +for as long as the PCIe device is known to Linux.
> +
> +The PCIe device may not be in the state to accept requests from userspace at
> +all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when 
> the
> +device can accept requests (ONLINE) and when the device is no longer 
> accepting
> +requests (OFFLINE) because of a reset or other state transition.
> +
>  QAIC defines a number of driver specific IOCTLs as part of the userspace API.
> -This section describes those APIs.
>  
>  DRM_IOCTL_QAIC_MANAGE
>    This IOCTL allows userspace to send a NNC request to the QSM. The call will
> diff --git a/drivers/accel/qaic/mhi_controller.c 
> b/drivers/accel/qaic/mhi_controller.c
> index 5d3cc30009cc..832464f2833a 100644
> --- a/drivers/accel/qaic/mhi_controller.c
> +++ b/drivers/accel/qaic/mhi_controller.c
> @@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller 
> *mhi_cntrl, enum mhi_callback re
>               pci_err(qdev->pdev, "Fatal error received from device. 
> Attempting to recover\n");
>       /* this event occurs in non-atomic context */
>       if (reason == MHI_CB_SYS_ERROR)
> -             qaic_dev_reset_clean_local_state(qdev, true);
> +             qaic_dev_reset_clean_local_state(qdev);
>  }
>  
>  static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
> index bd0c884e6bf7..66f4abf6c4c4 100644
> --- a/drivers/accel/qaic/qaic.h
> +++ b/drivers/accel/qaic/qaic.h
> @@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
>  void release_dbc(struct qaic_device *qdev, u32 dbc_id);
>  
>  void wake_all_cntl(struct qaic_device *qdev);
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool 
> exit_reset);
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev);
>  
>  struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct 
> dma_buf *dma_buf);
>  
> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
> index 02fe23248da4..c19bc83b249c 100644
> --- a/drivers/accel/qaic/qaic_drv.c
> +++ b/drivers/accel/qaic/qaic_drv.c
> @@ -8,6 +8,7 @@
>  #include <linux/idr.h>
>  #include <linux/interrupt.h>
>  #include <linux/list.h>
> +#include <linux/kobject.h>
>  #include <linux/kref.h>
>  #include <linux/mhi.h>
>  #include <linux/module.h>
> @@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in 
> polling mode");
>  static bool link_up;
>  static DEFINE_IDA(qaic_usrs);
>  
> -static int qaic_create_drm_device(struct qaic_device *qdev, s32 
> partition_id);
> -static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 
> partition_id);
> -
>  static void free_usr(struct kref *kref)
>  {
>       struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
> @@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device 
> *qdev, s32 partition_id)
>  
>       qddev->partition_id = partition_id;
>  
> -     /*
> -      * drm_dev_unregister() sets the driver data to NULL and
> -      * drm_dev_register() does not update the driver data. During a SOC
> -      * reset drm dev is unregistered and registered again leaving the
> -      * driver data to NULL.
> -      */
> -     dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
>       ret = drm_dev_register(drm, 0);
>       if (ret)
>               pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
> @@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device 
> *qdev, s32 partition_id)
>       struct drm_device *drm = to_drm(qddev);
>       struct qaic_user *usr;
>  
> -     drm_dev_get(drm);
>       drm_dev_unregister(drm);
>       qddev->partition_id = 0;
>       /*
> @@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device 
> *qdev, s32 partition_id)
>               mutex_lock(&qddev->users_mutex);
>       }
>       mutex_unlock(&qddev->users_mutex);
> -     drm_dev_put(drm);
>  }
>  
>  static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct 
> mhi_device_id *id)
> @@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, 
> const struct mhi_device_id
>  
>       qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
>  
> -     qdev->reset_state = QAIC_ONLINE;
> -
>       dev_set_drvdata(&mhi_dev->dev, qdev);
>       qdev->cntl_ch = mhi_dev;
>  
> @@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, 
> const struct mhi_device_id
>               return ret;
>       }
>  
> +     qdev->reset_state = QAIC_BOOT;
>       ret = get_cntl_version(qdev, NULL, &major, &minor);
>       if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
>               pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not 
> supported. Supported version is (%d.%d). Ret: %d\n",
> @@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, 
> const struct mhi_device_id
>               ret = -EINVAL;
>               goto close_control;
>       }
> -
> -     ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> +     qdev->reset_state = QAIC_ONLINE;
> +     kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
>  
>       return ret;
>  
> @@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev)
>  {
>       int i;
>  
> +     kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
>       qdev->reset_state = QAIC_OFFLINE;
>       /* wake up any waiters to avoid waiting for timeouts at sync */
>       wake_all_cntl(qdev);
> @@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev)
>       synchronize_srcu(&qdev->dev_lock);
>  }
>  
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool 
> exit_reset)
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
>  {
>       int i;
>  
>       qaic_notify_reset(qdev);
>  
> -     /* remove drmdevs to prevent new users from coming in */
> -     qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
> -
>       /* start tearing things down */
>       for (i = 0; i < qdev->num_dbc; ++i)
>               release_dbc(qdev, i);
> -
> -     if (exit_reset)
> -             qdev->reset_state = QAIC_ONLINE;
>  }
>  
>  static void cleanup_qdev(struct qaic_device *qdev)
> @@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev 
> *pdev, const struct pci_de
>       if (!qdev)
>               return NULL;
>  
> +     qdev->reset_state = QAIC_OFFLINE;
>       if (id->device == PCI_DEV_AIC100) {
>               qdev->num_dbc = 16;
>               qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, 
> sizeof(*qdev->dbc), GFP_KERNEL);
> @@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const 
> struct pci_device_id *id)
>               goto cleanup_qdev;
>       }
>  
> +     ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> +     if (ret)
> +             goto cleanup_qdev;
> +
>       qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, 
> mhi_irq,
>                                                      qdev->single_msi);
>       if (IS_ERR(qdev->mhi_cntrl)) {
>               ret = PTR_ERR(qdev->mhi_cntrl);
> -             goto cleanup_qdev;
> +             goto cleanup_drm_dev;
>       }
>  
>       return 0;
>  
> +cleanup_drm_dev:
> +     qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
>  cleanup_qdev:
>       cleanup_qdev(qdev);
>       return ret;
> @@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
>       if (!qdev)
>               return;
>  
> -     qaic_dev_reset_clean_local_state(qdev, false);
> +     qaic_dev_reset_clean_local_state(qdev);
> +     qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
>       qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
>       cleanup_qdev(qdev);
>  }
> @@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev)
>  
>       qaic_notify_reset(qdev);
>       qaic_mhi_start_reset(qdev->mhi_cntrl);
> -     qaic_dev_reset_clean_local_state(qdev, false);
> +     qaic_dev_reset_clean_local_state(qdev);
>  }
>  
>  static void qaic_pci_reset_done(struct pci_dev *pdev)
>  {
>       struct qaic_device *qdev = pci_get_drvdata(pdev);
>  
> -     qdev->reset_state = QAIC_ONLINE;
>       qaic_mhi_reset_done(qdev->mhi_cntrl);
>  }
>  

Reply via email to