Dequeue retry timeout controls the interval between checks for unmet
conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The
cost of additional bandwidth consumed by CP when polling memory
shouldn't be substantial.

Signed-off-by: Harish Kasiviswanathan <harish.kasiviswanat...@amd.com>
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +-
 .../drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c   |  4 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    | 28 ++++++++---------
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h    |  5 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 28 ++++++++---------
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  5 +--
 .../drm/amd/amdkfd/kfd_packet_manager_v9.c    | 31 ++++++++++++++-----
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  5 +--
 10 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 8dfdb18197c4..53f5f1885870 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
        .set_address_watch = kgd_gfx_aldebaran_set_address_watch,
        .clear_address_watch = kgd_gfx_v9_clear_address_watch,
        .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
-       .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info = 
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
        .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
        .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
        .hqd_reset = kgd_gfx_v9_hqd_reset,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 9abf29b58ac7..6fd41aece7e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
        .set_address_watch = kgd_gfx_v9_set_address_watch,
        .clear_address_watch = kgd_gfx_v9_clear_address_watch,
        .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
-       .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info = 
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
        .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
index e2ae714a700f..95f249896275 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
@@ -530,8 +530,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings =
                                kgd_gfx_v9_program_trap_handler_settings,
-       .build_grace_period_packet_info =
-                               kgd_gfx_v9_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info =
+                               
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
        .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
        .enable_debug_trap = kgd_aldebaran_enable_debug_trap,
        .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 62176d607bef..0b03f2e9a858 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device 
*adev,
        *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
 }
 
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device 
*adev,
                                                uint32_t wait_times,
-                                               uint32_t grace_period,
+                                               uint32_t sch_wave,
+                                               uint32_t que_sleep,
                                                uint32_t *reg_offset,
                                                uint32_t *reg_data)
 {
        *reg_data = wait_times;
 
-       /*
-        * The CP cannont handle a 0 grace period input and will result in
-        * an infinite grace period being set so set to 1 to prevent this.
-        */
-       if (grace_period == 0)
-               grace_period = 1;
-
-       *reg_data = REG_SET_FIELD(*reg_data,
-                       CP_IQ_WAIT_TIME2,
-                       SCH_WAVE,
-                       grace_period);
+       if (sch_wave)
+               *reg_data = REG_SET_FIELD(*reg_data,
+                               CP_IQ_WAIT_TIME2,
+                               SCH_WAVE,
+                               sch_wave);
+       if (que_sleep)
+               *reg_data = REG_SET_FIELD(*reg_data,
+                               CP_IQ_WAIT_TIME2,
+                               QUE_SLEEP,
+                               que_sleep);
 
        *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
 }
@@ -1109,7 +1109,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
        .set_address_watch = kgd_gfx_v10_set_address_watch,
        .clear_address_watch = kgd_gfx_v10_clear_address_watch,
        .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
-       .build_grace_period_packet_info = 
kgd_gfx_v10_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info = 
kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
        .program_trap_handler_settings = program_trap_handler_settings,
        .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
        .hqd_reset = kgd_gfx_v10_hqd_reset
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
index 9efd2dd4fdd7..89ae07288b10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
@@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct 
amdgpu_device *adev,
 void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
                                uint32_t *wait_times,
                                uint32_t inst);
-void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device 
*adev,
                                               uint32_t wait_times,
-                                              uint32_t grace_period,
+                                              uint32_t sch_wave,
+                                              uint32_t que_sleep,
                                               uint32_t *reg_offset,
                                               uint32_t *reg_data);
 uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index c718bedda0ca..2c5f22838fe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
        .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
        .program_trap_handler_settings = program_trap_handler_settings_v10_3,
        .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
-       .build_grace_period_packet_info = 
kgd_gfx_v10_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info = 
kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
        .enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
        .disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
        .validate_trap_override_request = 
kgd_gfx_v10_validate_trap_override_request,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 441568163e20..d2bbe9973c93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device 
*adev,
                                adev->gfx.cu_info.max_waves_per_simd;
 }
 
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device 
*adev,
                uint32_t wait_times,
-               uint32_t grace_period,
+               uint32_t sch_wave,
+               uint32_t que_sleep,
                uint32_t *reg_offset,
                uint32_t *reg_data)
 {
        *reg_data = wait_times;
 
-       /*
-        * The CP cannot handle a 0 grace period input and will result in
-        * an infinite grace period being set so set to 1 to prevent this.
-        */
-       if (grace_period == 0)
-               grace_period = 1;
-
-       *reg_data = REG_SET_FIELD(*reg_data,
-                       CP_IQ_WAIT_TIME2,
-                       SCH_WAVE,
-                       grace_period);
+       if (sch_wave)
+               *reg_data = REG_SET_FIELD(*reg_data,
+                               CP_IQ_WAIT_TIME2,
+                               SCH_WAVE,
+                               sch_wave);
+       if (que_sleep)
+               *reg_data = REG_SET_FIELD(*reg_data,
+                               CP_IQ_WAIT_TIME2,
+                               QUE_SLEEP,
+                               que_sleep);
 
        *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
 }
@@ -1254,7 +1254,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
        .set_address_watch = kgd_gfx_v9_set_address_watch,
        .clear_address_watch = kgd_gfx_v9_clear_address_watch,
        .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
-       .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
+       .build_dequeue_wait_counts_packet_info = 
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
        .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
        .program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
        .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index b6a91a552aa4..54ee8992be5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device 
*adev,
 void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
                                uint32_t *wait_times,
                                uint32_t inst);
-void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
+void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device 
*adev,
                                               uint32_t wait_times,
-                                              uint32_t grace_period,
+                                              uint32_t sch_wave,
+                                              uint32_t que_sleep,
                                               uint32_t *reg_offset,
                                               uint32_t *reg_data);
 uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 9cb21af1d0af..f9c302732773 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager *pm, 
uint32_t *buffer,
 }
 
 static inline void pm_build_dequeue_wait_counts_packet_info(struct 
packet_manager *pm,
-                       uint32_t sch_value, uint32_t *reg_offset,
+                       uint32_t sch_value, uint32_t que_sleep, uint32_t 
*reg_offset,
                        uint32_t *reg_data)
 {
-       pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
+       pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
                pm->dqm->dev->adev,
                pm->dqm->wait_times,
                sch_value,
+               que_sleep,
                reg_offset,
                reg_data);
 }
@@ -320,26 +321,40 @@ static int pm_config_dequeue_wait_counts_v9(struct 
packet_manager *pm,
 
        switch (cmd) {
        case KFD_DEQUEUE_WAIT_INIT:
+               uint32_t sch_wave = 0, que_sleep = 0;
+               /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
+                * On a 1GHz machine this is roughly 1 microsecond, which is
+                * about how long it takes to load data out of memory during
+                * queue connect
+                * QUE_SLEEP: Wait Count for Dequeue Retry.
+                */
+               if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
+                   KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0))
+                       que_sleep = 1;
+
                /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
                if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu 
&&
                   (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
-                       pm_build_dequeue_wait_counts_packet_info(pm, 1, 
&reg_offset, &reg_data);
-               else
-                       return 0;
+                       sch_wave = 1;
+
+               pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, 
que_sleep,
+                       &reg_offset, &reg_data);
+
                break;
        case KFD_DEQUEUE_WAIT_RESET:
                /* function called only to get reg_offset */
-               pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset, 
&reg_data);
+               pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, 
&reg_data);
                reg_data = pm->dqm->wait_times;
                break;
 
        case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
                /* The CP cannot handle value 0 and it will result in
-                * an infinite grace period being set so set to 1 to prevent 
this.
+                * an infinite grace period being set so set to 1 to prevent 
this. Also
+                * avoid debugger API breakage as it sets 0 and expects a low 
value.
                 */
                if (!value)
                        value = 1;
-               pm_build_dequeue_wait_counts_packet_info(pm, value, 
&reg_offset, &reg_data);
+               pm_build_dequeue_wait_counts_packet_info(pm, value, 0, 
&reg_offset, &reg_data);
                break;
        }
 
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index e3e635a31b8a..4f79e707bc80 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -313,9 +313,10 @@ struct kfd2kgd_calls {
        void (*get_iq_wait_times)(struct amdgpu_device *adev,
                        uint32_t *wait_times,
                        uint32_t inst);
-       void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
+       void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device 
*adev,
                        uint32_t wait_times,
-                       uint32_t grace_period,
+                       uint32_t sch_wave,
+                       uint32_t que_sleep,
                        uint32_t *reg_offset,
                        uint32_t *reg_data);
        void (*get_cu_occupancy)(struct amdgpu_device *adev,
-- 
2.34.1

Reply via email to