From: David Yat Sin <[email protected]> Add support for separate ring-buffer for metadata packets when using compute queues. Userspace application allocate the metadata ring-buffer and the queue ring-buffer with a single allocation. The metadata ring-buffer starts after the queue ring-buffer.
Signed-off-by: David Yat Sin <[email protected]> Reviewed-by: Philip Yang <[email protected]> Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 +++++++ .../drm/amd/amdkfd/kfd_mqd_manager_v12_1.c | 21 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 7 +++++-- include/uapi/linux/kfd_ioctl.h | 5 +++-- 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 041237861107f..88621cb7d4098 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -221,6 +221,11 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Size lower. clamped to KFD_MIN_QUEUE_RING_SIZE"); } + if ((args->metadata_ring_size != 0) && !is_power_of_2(args->metadata_ring_size)) { + pr_err("Metadata ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + if (!access_ok((const void __user *) args->read_pointer_address, sizeof(uint32_t))) { pr_err("Can't access read pointer\n"); @@ -255,6 +260,9 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; + if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) + q_properties->metadata_queue_size = args->metadata_ring_size; + q_properties->read_ptr = (void __user *)args->read_pointer_address; q_properties->write_ptr = (void __user *)args->write_pointer_address; q_properties->eop_ring_buffer_address = args->eop_buffer_address; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c index f1c2c9e8cf6bb..a06b4e89af8a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12_1.c @@ -266,6 +266,27 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + if (q->metadata_queue_size) { + /* On GC 12.1 is 64 DWs which is 4 times size of AQL packet */ + if (q->metadata_queue_size == q->queue_size * 4) { + /* + * User application allocates main queue ring and metadata queue ring + * with a single allocation. metadata queue ring starts after main + * queue ring. + */ + m->cp_hqd_kd_base = + lower_32_bits((q->queue_address + q->queue_size) >> 8); + m->cp_hqd_kd_base_hi = + upper_32_bits((q->queue_address + q->queue_size) >> 8); + + m->cp_hqd_kd_cntl |= CP_HQD_KD_CNTL__KD_FETCHER_ENABLE_MASK; + /* KD_SIZE = 2 for metadata packet = 64 DWs */ + m->cp_hqd_kd_cntl |= 2 << CP_HQD_KD_CNTL__KD_SIZE__SHIFT; + } else { + pr_warn("Invalid metadata ring size, metadata queue will be ignored\n"); + } + } + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index ebc637c38c04a..d798baa7e52e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -506,7 +506,8 @@ struct queue_properties { enum kfd_queue_format format; unsigned int queue_id; uint64_t queue_address; - uint64_t queue_size; + uint64_t queue_size; + uint64_t metadata_queue_size; uint32_t priority; uint32_t queue_percent; void __user *read_ptr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 56c97189e7f12..1b465fdb2c645 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -247,9 +247,12 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope properties->format == KFD_QUEUE_FORMAT_AQL && topo_dev->node_props.gfx_target_version >= 70000 && topo_dev->node_props.gfx_target_version < 90000) - expected_queue_size = properties->queue_size / 2; + /* metadata_queue_size not supported on GFX7/GFX8 */ + expected_queue_size = + properties->queue_size / 2; else - expected_queue_size = properties->queue_size; + expected_queue_size = + properties->queue_size + properties->metadata_queue_size; vm = drm_priv_to_vm(pdd->drm_priv); err = amdgpu_bo_reserve(vm->root.bo, false); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 6e91875c10baa..047bcb1cc0789 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -47,9 +47,10 @@ * - 1.19 - Add a new ioctl to craete secondary kfd processes * - 1.20 - Trap handler support for expert scheduling mode available * - 1.21 - Debugger support to subscribe to LDS out-of-address exceptions + * - 1.22 - Add queue creation with metadata ring base address */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 21 +#define KFD_IOCTL_MINOR_VERSION 22 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -87,7 +88,7 @@ struct kfd_ioctl_create_queue_args { __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ __u32 sdma_engine_id; /* to KFD */ - __u32 pad; + __u32 metadata_ring_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { -- 2.52.0
