From: "jesse.zh...@amd.com" <jesse.zh...@amd.com>

This patch introduces two new functions, `amdgpu_sdma_stop_queue` and
`amdgpu_sdma_start_queue`, to handle the stopping and starting of SDMA queues
during engine reset operations. The changes include:

1. **New Functions**:
   - `amdgpu_sdma_stop_queue`: Stops the SDMA queues and the scheduler's work 
queue
     for the GFX and page rings.
   - `amdgpu_sdma_start_queue`: Starts the SDMA queues and restarts the 
scheduler's
     work queue for the GFX and page rings.

2. **Integration with Ring Functions**:
   - The `stop_queue` and `start_queue` callbacks are added to the 
`amdgpu_ring_funcs`
     structure and implemented for SDMA v4.4.2.

Suggested-by:Jonathan Kim <jonathan....@amd.com>
Signed-off-by: Jesse Zhang <jesse.zh...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 92 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  2 +
 4 files changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d55c8b7fdb59..ff9aacbdf046 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -351,6 +351,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct 
amdgpu_ring *ring,
                0xffffffffffffffff : ring->buf_mask;
        /*  Initialize cached_rptr to 0 */
        ring->cached_rptr = 0;
+       atomic_set(&ring->stop_refcount, 0);
 
        /* Allocate ring buffer */
        if (ring->is_mes_queue) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 1c52ff92ea26..7a984dbb48c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -312,6 +312,8 @@ struct amdgpu_ring {
        unsigned int    entry_index;
        /* store the cached rptr to restore after reset */
        uint64_t cached_rptr;
+       /* Reference counter for stop requests */
+       atomic_t stop_refcount;
 
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 39669f8788a7..7cd6dcd6e7f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -30,6 +30,7 @@
 #define AMDGPU_CSA_SDMA_SIZE 64
 /* SDMA CSA reside in the 3rd page of CSA */
 #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
+DEFINE_MUTEX(sdma_queue_mutex);
 
 /*
  * GPU SDMA IP block helpers function.
@@ -504,6 +505,97 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct 
amdgpu_device *adev)
        }
 }
 
+int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id)
+{
+       struct amdgpu_sdma_instance *sdma_instance = 
&adev->sdma.instance[instance_id];
+       struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+       struct amdgpu_ring *page_ring = &sdma_instance->page;
+       int r;
+
+       mutex_lock(&sdma_queue_mutex);
+
+       /* Avoid accidentally unparking the sched thread during GPU reset */
+       r = down_read_killable(&adev->reset_domain->sem);
+       if (r)
+               goto exit;
+
+       /* Increment the reference counter */
+       atomic_inc(&gfx_ring->stop_refcount);
+       if (adev->sdma.has_page_queue)
+               atomic_inc(&page_ring->stop_refcount);
+
+       if (atomic_read(&gfx_ring->stop_refcount) != 1 ||
+          (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) 
!= 1)) {
+               up_read(&adev->reset_domain->sem);
+               r = -EBUSY;
+               goto exit;
+       }
+
+       if (!amdgpu_ring_sched_ready(gfx_ring))
+               drm_sched_wqueue_stop(&gfx_ring->sched);
+
+       if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring))
+               drm_sched_wqueue_stop(&page_ring->sched);
+
+       if (gfx_ring->funcs && gfx_ring->funcs->stop_queue)
+               gfx_ring->funcs->stop_queue(adev, instance_id);
+
+       if (adev->sdma.has_page_queue && page_ring->funcs && 
page_ring->funcs->stop_queue)
+               page_ring->funcs->stop_queue(adev, instance_id);
+
+       up_read(&adev->reset_domain->sem);
+
+exit:
+       mutex_unlock(&sdma_queue_mutex);
+       return r;
+}
+
+int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id)
+{
+       struct amdgpu_sdma_instance *sdma_instance = 
&adev->sdma.instance[instance_id];
+       struct amdgpu_ring *gfx_ring = &sdma_instance->ring;
+       struct amdgpu_ring *page_ring = &sdma_instance->page;
+       int r;
+
+       mutex_lock(&sdma_queue_mutex);
+
+       /* Avoid accidentally unparking the sched thread during GPU reset */
+       r = down_read_killable(&adev->reset_domain->sem);
+       if (r)
+               goto exit;
+
+       /* Decrement the reference counter */
+       atomic_dec(&gfx_ring->stop_refcount);
+       if (adev->sdma.has_page_queue)
+               atomic_dec(&page_ring->stop_refcount);
+
+       if (atomic_read(&gfx_ring->stop_refcount) != 0 ||
+          (adev->sdma.has_page_queue && atomic_read(&page_ring->stop_refcount) 
!= 0)) {
+               up_read(&adev->reset_domain->sem);
+               r = -EBUSY;
+               goto exit;
+       }
+
+       if (gfx_ring->funcs && gfx_ring->funcs->start_queue)
+               gfx_ring->funcs->start_queue(adev, instance_id);
+
+       if (adev->sdma.has_page_queue && page_ring->funcs && 
page_ring->funcs->start_queue)
+               page_ring->funcs->start_queue(adev, instance_id);
+
+       /* Restart the scheduler's work queue for the GFX and page rings */
+       if (amdgpu_ring_sched_ready(gfx_ring))
+               drm_sched_wqueue_start(&gfx_ring->sched);
+
+       if (amdgpu_ring_sched_ready(page_ring))
+               drm_sched_wqueue_start(&page_ring->sched);
+
+       up_read(&adev->reset_domain->sem);
+
+exit:
+       mutex_unlock(&sdma_queue_mutex);
+       return r;
+}
+
 /**
  * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks
  * @funcs: Pointer to the callback structure containing pre_reset and 
post_reset functions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 965169320065..a91791fa3ecf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -170,6 +170,8 @@ struct amdgpu_buffer_funcs {
 
 void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, 
struct sdma_on_reset_funcs *funcs);
 int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, 
bool suspend_user_queues);
+int amdgpu_sdma_stop_queue(struct amdgpu_device *adev, uint32_t instance_id);
+int amdgpu_sdma_start_queue(struct amdgpu_device *adev, uint32_t instance_id);
 
 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) 
(adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) 
(adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
-- 
2.25.1

Reply via email to