[AMD Official Use Only - General]

-----------------
Best Regards,
Thomas

-----Original Message-----
From: Zhang, Hawking <hawking.zh...@amd.com>
Sent: Thursday, April 25, 2024 11:01 AM
To: Chai, Thomas <yipeng.c...@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhou1, Tao <tao.zh...@amd.com>; Li, Candice <candice...@amd.com>; Wang, 
Yang(Kevin) <kevinyang.w...@amd.com>; Yang, Stanley <stanley.y...@amd.com>
Subject: RE: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison 
consumption

[AMD Official Use Only - General]

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+                       enum amdgpu_ras_block block, uint16_t pasid,
+                       pasid_notify pasid_fn, void *data, uint32_t
+reset);

> So we ultimately switch to above poison consumption handler for all the 
> existing v9 adapters, right? If so, we shall be able to make this function 
> backwards compatible. I'm wondering if we can just change the existing 
> amdgpu_amdkfd_ras_poison_consumption_handler.

> Pasid_poison_consumption_handler is a little bit confusing.

[Thomas] No,  Only  UMC_HWIP  greater or equal to IP_VERSION(12, 0, 0)),  it 
works on  the new path.  The IP check is in amdgpu_umc_pasid_poison_handler 
function.



Regards,
Hawking

-----Original Message-----
From: Chai, Thomas <yipeng.c...@amd.com>
Sent: Thursday, April 18, 2024 10:59
To: amd-gfx@lists.freedesktop.org
Cc: Chai, Thomas <yipeng.c...@amd.com>; Zhang, Hawking <hawking.zh...@amd.com>; 
Zhou1, Tao <tao.zh...@amd.com>; Li, Candice <candice...@amd.com>; Wang, 
Yang(Kevin) <kevinyang.w...@amd.com>; Yang, Stanley <stanley.y...@amd.com>; 
Chai, Thomas <yipeng.c...@amd.com>
Subject: [PATCH 11/15] drm/amdgpu: prepare to handle pasid poison consumption

Prepare to handle pasid poison consumption.

Signed-off-by: YiPeng Chai <yipeng.c...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    |  9 ++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c       | 20 ++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h       |  3 +++
 .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  3 ++-
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 66753940bb4d..287ce431901c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -759,10 +759,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
        return amdgpu_ras_get_fed_status(adev);  }

+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+                               enum amdgpu_ras_block block, uint16_t pasid,
+                               pasid_notify pasid_fn, void *data, uint32_t 
reset) {
+       amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn,
+data, reset); }
+
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
        enum amdgpu_ras_block block, uint32_t reset)  {
-       amdgpu_umc_poison_handler(adev, block, reset);
+       amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL,
+ reset);
 }

 int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, diff 
--git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ad50c7bbc326..54e15994d02b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -401,6 +401,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device 
*adev,
                                struct tile_config *config);  void 
amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
                        enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device 
*adev,
+                       enum amdgpu_ras_block block, uint16_t pasid,
+                       pasid_notify pasid_fn, void *data, uint32_t
+reset);
+
 bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);  bool 
amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem 
*mem);  void amdgpu_amdkfd_block_mmu_notifications(void *p); diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index dcda3d24bee3..8ebbca9e2e22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -252,8 +252,9 @@ int amdgpu_umc_bad_page_polling_timeout(struct 
amdgpu_device *adev,
        return 0;
 }

-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
-                       enum amdgpu_ras_block block, uint32_t reset)
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+                       enum amdgpu_ras_block block, uint16_t pasid,
+                       pasid_notify pasid_fn, void *data, uint32_t
+reset)
 {
        int ret = AMDGPU_RAS_SUCCESS;

@@ -291,16 +292,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,

                        amdgpu_ras_error_data_fini(&err_data);
                } else {
-                       if (reset) {
-                               amdgpu_umc_bad_page_polling_timeout(adev,
-                                                       reset, 
MAX_UMC_POISON_POLLING_TIME_SYNC);
-                       } else {
                                struct amdgpu_ras *con = 
amdgpu_ras_get_context(adev);

+                               amdgpu_ras_put_poison_req(adev,
+                                       block, pasid, pasid_fn, data,
+ reset);
+
                                atomic_inc(&con->page_retirement_req_cnt);

                                wake_up(&con->page_retirement_wq);
-                       }
                }
        } else {
                if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
@@ -313,6 +312,13 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
        return ret;
 }

+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+                       enum amdgpu_ras_block block, uint32_t reset) {
+       return amdgpu_umc_pasid_poison_handler(adev,
+                               block, 0, NULL, NULL, reset); }
+
 int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
                void *ras_error_status,
                struct amdgpu_iv_entry *entry) diff --git 
a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 9e77e6d48e3b..5f50c69c3cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -106,6 +106,9 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);  
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if 
*ras_block);  int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
                        enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+                       enum amdgpu_ras_block block, uint16_t pasid,
+                       pasid_notify pasid_fn, void *data, uint32_t
+reset);
 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
                struct amdgpu_irq_src *source,
                struct amdgpu_iv_entry *entry); diff --git 
a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index c368c70df3f4..6bf4bbc3cffa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -201,7 +201,8 @@ static void event_interrupt_poison_consumption_v9(struct 
kfd_node *dev,
                        "RAS poison consumption, fall back to gpu reset flow: 
client id %d\n",
                        client_id);

-       amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, block, reset);
+       amdgpu_amdkfd_ras_pasid_poison_consumption_handler(dev->adev,
+               block, pasid, NULL, NULL, reset);
 }

 static bool context_id_expected(struct kfd_dev *dev)
--
2.34.1


Reply via email to