---
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 44 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 16 ++-----
.../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 23 ++++++----
4 files changed, 64 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index ec911dce345f..4d884180cf61 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -403,6 +403,50 @@ void amdgpu_gart_map_vram_range(struct amdgpu_device
*adev, uint64_t pa,
drm_dev_exit(idx);
}
+/**
+ * amdgpu_gart_map_gfx9_mqd - map mqd and ctrl_stack dma_addresses into GART
entries
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @dma_addr: DMA addresses of pages
+ * @flags: page table entry flags
+ *
+ * Map the MQD and control stack addresses into GART entries with the correct
+ * memory types on gfxv9. The MQD occupies the first 4KB and is followed by
+ * the control stack. The MQD uses UC (uncached) memory, while the control
stack
+ * uses NC (non-coherent) memory.
+ */
+void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags)
+{
+ uint64_t page_base;
+ unsigned int i, j, t;
+ int idx;
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
+ void *dst;
+
+ if (!adev->gart.ptr)
+ return;
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return;
+
+ t = offset / AMDGPU_GPU_PAGE_SIZE;
+ dst = adev->gart.ptr;
+ for (i = 0; i < pages; i++) {
+ page_base = dma_addr[i];
+ for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
+ if ((i == 0) && (j == 0))
+ amdgpu_gmc_set_pte_pde(adev, dst, t, page_base,
flags);
+ else
+ amdgpu_gmc_set_pte_pde(adev, dst, t, page_base,
ctrl_flags);
+ page_base += AMDGPU_GPU_PAGE_SIZE;
+ }
+ }
+ drm_dev_exit(idx);
+}
+
/**
* amdgpu_gart_bind - bind pages into the gart page table
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index d3118275ddae..6ebd2da32ea6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -62,6 +62,8 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t
offset,
void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr, uint64_t flags,
void *dst);
+void amdgpu_gart_map_gfx9_mqd(struct amdgpu_device *adev, uint64_t offset,
+ int pages, dma_addr_t *dma_addr, uint64_t flags);
void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, dma_addr_t *dma_addr, uint64_t flags);
void amdgpu_gart_map_vram_range(struct amdgpu_device *adev, uint64_t pa,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 67983955a124..e086eb1d2b24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -855,25 +855,15 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct
amdgpu_device *adev,
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
- uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
- /* MQD page: use default flags */
- amdgpu_gart_bind(adev,
+ amdgpu_gart_map_gfx9_mqd(adev,
gtt->offset + (page_idx << PAGE_SHIFT),
- 1, >t->ttm.dma_address[page_idx], flags);
- /*
- * Ctrl pages - modify the memory type to NC (ctrl_flags) from
- * the second page of the BO onward.
- */
- amdgpu_gart_bind(adev,
- gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
- pages_per_xcc - 1,
- >t->ttm.dma_address[page_idx + 1],
- ctrl_flags);
+ pages_per_xcc, >t->ttm.dma_address[page_idx],
+ flags);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index dcf4bbfa641b..ff0e483514da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -42,9 +42,16 @@ static uint64_t mqd_stride_v9(struct mqd_manager *mm,
struct queue_properties *q)
{
if (mm->dev->kfd->cwsr_enabled &&
- q->type == KFD_QUEUE_TYPE_COMPUTE)
- return ALIGN(q->ctl_stack_size, PAGE_SIZE) +
- ALIGN(sizeof(struct v9_mqd), PAGE_SIZE);
+ q->type == KFD_QUEUE_TYPE_COMPUTE) {
+
+ /* On gfxv9, the MQD resides in the first 4K page,
+ * followed by the control stack. Align both to
+ * AMDGPU_GPU_PAGE_SIZE to maintain the required 4K boundary.
+ */
+
+ return ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE),
PAGE_SIZE);
+ }
return mm->mqd_size;
}
@@ -148,8 +155,8 @@ static struct kfd_mem_obj *allocate_mqd(struct mqd_manager
*mm,
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_kernel_mem(node->adev,
- (ALIGN(q->ctl_stack_size, PAGE_SIZE) +
- ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
+ (ALIGN(ALIGN(q->ctl_stack_size, AMDGPU_GPU_PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), AMDGPU_GPU_PAGE_SIZE),
PAGE_SIZE)) *
NUM_XCC(node->xcc_mask),
mqd_on_vram(node->adev) ? AMDGPU_GEM_DOMAIN_VRAM :
AMDGPU_GEM_DOMAIN_GTT,
@@ -357,7 +364,7 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
struct kfd_context_save_area_header header;
/* Control stack is located one page after MQD. */
- void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+ void *mqd_ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
m = get_mqd(mqd);
@@ -394,7 +401,7 @@ static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, voi
{
struct v9_mqd *m;
/* Control stack is located one page after MQD. */
- void *ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+ void *ctl_stack = (void *)((uintptr_t)mqd + AMDGPU_GPU_PAGE_SIZE);
m = get_mqd(mqd);
@@ -440,7 +447,7 @@ static void restore_mqd(struct mqd_manager *mm, void **mqd,
*gart_addr = addr;
/* Control stack is located one page after MQD. */
- ctl_stack = (void *)((uintptr_t)*mqd + PAGE_SIZE);
+ ctl_stack = (void *)((uintptr_t)*mqd + AMDGPU_GPU_PAGE_SIZE);
memcpy(ctl_stack, ctl_stack_src, ctl_stack_size);
m->cp_hqd_pq_doorbell_control =