[PATCH] drm/amdkfd: Add metadata ring buffer for compute

Alex Deucher Tue, 16 Dec 2025 13:04:24 -0800

From: David Yat Sin <[email protected]>

Add support for separate ring-buffer for metadata packets when using
compute queues. Userspace application allocate the metadata ring-buffer
and the queue ring-buffer with a single allocation. The metadata
ring-buffer starts after the queue ring-buffer.


Signed-off-by: David Yat Sin <[email protected]>
Reviewed-by: Philip Yang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  8 +++++++
 .../drm/amd/amdkfd/kfd_mqd_manager_v12_1.c    | 21 +++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_queue.c        |  7 +++++--
 include/uapi/linux/kfd_ioctl.h                |  5 +++--
 5 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 041237861107f..88621cb7d4098 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -221,6 +221,11 @@ static int set_queue_properties_from_user(struct 
queue_properties *q_properties,
                pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE");
        }
 
+       if ((args->metadata_ring_size != 0) && 
!is_power_of_2(args->metadata_ring_size)) {
+               pr_err("Metadata ring size must be a power of 2 or 0\n");
+               return -EINVAL;
+       }
+
        if (!access_ok((const void __user *) args->read_pointer_address,
                        sizeof(uint32_t))) {
                pr_err("Can't access read pointer\n");
@@ -255,6 +260,9 @@ static int set_queue_properties_from_user(struct 
queue_properties *q_properties,
        q_properties->priority = args->queue_priority;
        q_properties->queue_address = args->ring_base_address;
        q_properties->queue_size = args->ring_size;
+       if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
+               q_properties->metadata_queue_size = args->metadata_ring_size;
+
        q_properties->read_ptr = (void __user *)args->read_pointer_address;
        q_properties->write_ptr = (void __user *)args->write_pointer_address;
        q_properties->eop_ring_buffer_address = args->eop_buffer_address;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
index f1c2c9e8cf6bb..a06b4e89af8a2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c
@@ -266,6 +266,27 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
        m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
        m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 
+       if (q->metadata_queue_size) {
+               /* On GC 12.1 is 64 DWs which is 4 times size of AQL packet */
+               if (q->metadata_queue_size == q->queue_size * 4) {
+                       /*
+                        * User application allocates main queue ring and 
metadata queue ring
+                        * with a single allocation. metadata queue ring starts 
after main
+                        * queue ring.
+                        */
+                       m->cp_hqd_kd_base =
+                               lower_32_bits((q->queue_address + 
q->queue_size) >> 8);
+                       m->cp_hqd_kd_base_hi =
+                               upper_32_bits((q->queue_address + 
q->queue_size) >> 8);
+
+                       m->cp_hqd_kd_cntl |= 
CP_HQD_KD_CNTL__KD_FETCHER_ENABLE_MASK;
+                       /* KD_SIZE = 2 for metadata packet = 64 DWs */
+                       m->cp_hqd_kd_cntl |= 2 << 
CP_HQD_KD_CNTL__KD_SIZE__SHIFT;
+               } else {
+                       pr_warn("Invalid metadata ring size, metadata queue 
will be ignored\n");
+               }
+       }
+
        m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
        m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
        m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ebc637c38c04a..d798baa7e52e4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -506,7 +506,8 @@ struct queue_properties {
        enum kfd_queue_format format;
        unsigned int queue_id;
        uint64_t queue_address;
-       uint64_t  queue_size;
+       uint64_t queue_size;
+       uint64_t metadata_queue_size;
        uint32_t priority;
        uint32_t queue_percent;
        void __user *read_ptr;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 56c97189e7f12..1b465fdb2c645 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -247,9 +247,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device 
*pdd, struct queue_prope
            properties->format == KFD_QUEUE_FORMAT_AQL &&
            topo_dev->node_props.gfx_target_version >= 70000 &&
            topo_dev->node_props.gfx_target_version < 90000)
-               expected_queue_size = properties->queue_size / 2;
+               /* metadata_queue_size not supported on GFX7/GFX8 */
+               expected_queue_size =
+                       properties->queue_size / 2;
        else
-               expected_queue_size = properties->queue_size;
+               expected_queue_size =
+                       properties->queue_size + 
properties->metadata_queue_size;
 
        vm = drm_priv_to_vm(pdd->drm_priv);
        err = amdgpu_bo_reserve(vm->root.bo, false);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 6e91875c10baa..047bcb1cc0789 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -47,9 +47,10 @@
  * - 1.19 - Add a new ioctl to craete secondary kfd processes
  * - 1.20 - Trap handler support for expert scheduling mode available
  * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions
+ * - 1.22 - Add queue creation with metadata ring base address
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 21
+#define KFD_IOCTL_MINOR_VERSION 22
 
 struct kfd_ioctl_get_version_args {
        __u32 major_version;    /* from KFD */
@@ -87,7 +88,7 @@ struct kfd_ioctl_create_queue_args {
        __u32 ctx_save_restore_size;    /* to KFD */
        __u32 ctl_stack_size;           /* to KFD */
        __u32 sdma_engine_id;           /* to KFD */
-       __u32 pad;
+       __u32 metadata_ring_size;       /* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-- 
2.52.0

[PATCH] drm/amdkfd: Add metadata ring buffer for compute

Reply via email to