Am 2020-08-28 um 1:53 p.m. schrieb Mukul Joshi:
> Add support for reporting GPU reset events through SMI. KFD
> would report both pre and post GPU reset events.
>
> Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>

Minor coding-style nit-picks inline. With those fixed, this patch is

Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>

And I noticed a coding style issue from a previous commit that could be
cleaned up in a follow up. See inline.


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c     |  5 +++
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h       |  2 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 35 +++++++++++++++++++--
>  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h |  1 +
>  include/uapi/linux/kfd_ioctl.h              |  2 ++
>  5 files changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index e1cd6599529f..f5e1b3aaa10c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -812,6 +812,8 @@ int kgd2kfd_pre_reset(struct kfd_dev *kfd)
>       if (!kfd->init_complete)
>               return 0;
>  
> +     kfd_smi_event_update_gpu_reset(kfd, false);
> +
>       kfd->dqm->ops.pre_reset(kfd->dqm);
>  
>       kgd2kfd_suspend(kfd, false);
> @@ -833,6 +835,7 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
>       if (!kfd->init_complete)
>               return 0;
>  
> +

Unnecessary white-space change.


>       ret = kfd_resume(kfd);
>       if (ret)
>               return ret;
> @@ -840,6 +843,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
>  
>       atomic_set(&kfd->sram_ecc_flag, 0);
>  
> +     kfd_smi_event_update_gpu_reset(kfd, true);
> +
>       return 0;
>  }
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 18bc711f97ae..7e8767934748 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -312,6 +312,8 @@ struct kfd_dev {
>       /* Clients watching SMI events */
>       struct list_head smi_clients;
>       spinlock_t smi_lock;
> +
> +     uint32_t reset_seq_num;
>  };
>  
>  enum kfd_mempool {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index 4d4b6e3ab697..001cacb09467 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -174,6 +174,37 @@ static void add_event_to_kfifo(struct kfd_dev *dev, 
> unsigned int smi_event,
>       rcu_read_unlock();
>  }
>  
> +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
> +{
> +     /*
> +      * GpuReset msg = Reset seq number (incremented for
> +      * every reset message sent before GPU reset).
> +      * 1 byte event + 1 byte space + 8 bytes seq num +
> +      * 1 byte \n + 1 byte \0 = 12
> +      */
> +     char fifo_in[12];
> +     int len;
> +     unsigned int event;
> +
> +     if (list_empty(&dev->smi_clients)) {
> +             return;
> +     }

You don't need {} for a single statement.


> +
> +     memset(fifo_in, 0x0, sizeof(fifo_in));
> +
> +     if (post_reset) {

The {} are OK (and recommended) here for consistency with the else-branch.


> +             event = KFD_SMI_EVENT_GPU_POST_RESET;
> +     } else {
> +             event = KFD_SMI_EVENT_GPU_PRE_RESET;
> +             ++(dev->reset_seq_num);
> +     }
> +
> +     len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
> +                                             dev->reset_seq_num);
> +
> +     add_event_to_kfifo(dev, event, fifo_in, len);
> +}
> +
>  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
>                                            uint32_t throttle_bitmask)
>  {
> @@ -191,7 +222,7 @@ void kfd_smi_event_update_thermal_throttling(struct 
> kfd_dev *dev,
>       if (list_empty(&dev->smi_clients))
>               return;
>  
> -     len = snprintf(fifo_in, 29, "%x %x:%llx\n",
> +     len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n",
>                      KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
>                      atomic64_read(&adev->smu.throttle_int_counter));
>  
> @@ -218,7 +249,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, 
> uint16_t pasid)
>       if (!task_info.pid)
>               return;
>  
> -     len = snprintf(fifo_in, 29, "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
> +     len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", 
> KFD_SMI_EVENT_VMFAULT,
>               task_info.pid, task_info.task_name);
>  
>       add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> index 15537b2cccb5..b9b0438202e2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
> @@ -27,5 +27,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd);
>  void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid);
>  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
>                                            uint32_t throttle_bitmask);
> +void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset);
>  
>  #endif
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index cb1f963a84e0..8b7368bfbd84 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -453,6 +453,8 @@ enum kfd_smi_event {
>          KFD_SMI_EVENT_NONE = 0, /* not used */
>          KFD_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
>          KFD_SMI_EVENT_THERMAL_THROTTLE = 2,

Looks like a previous commit didn't use TABs for indentation here. Can
you make a fix for that?

Thanks,
  Felix


> +     KFD_SMI_EVENT_GPU_PRE_RESET = 3,
> +     KFD_SMI_EVENT_GPU_POST_RESET = 4,
>  };
>  
>  #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to