From: Philip Yang <philip.y...@amd.com>

Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap
svm range system memory pages address to GPUs.

Signed-off-by: Philip Yang <philip.y...@amd.com>
Signed-off-by: Alex Sierra <alex.sie...@amd.com>
Signed-off-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 232 ++++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   2 +
 2 files changed, 233 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 55500ec4972f..3c4a036609c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -534,6 +534,229 @@ svm_range_split_add_front(struct svm_range *prange, 
struct svm_range *new,
        return 0;
 }
 
+static uint64_t
+svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
+{
+       uint32_t flags = prange->flags;
+       uint32_t mapping_flags;
+       uint64_t pte_flags;
+
+       pte_flags = AMDGPU_PTE_VALID;
+       pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED;
+
+       mapping_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
+
+       if (flags & KFD_IOCTL_SVM_FLAG_GPU_RO)
+               mapping_flags &= ~AMDGPU_VM_PAGE_WRITEABLE;
+       if (flags & KFD_IOCTL_SVM_FLAG_GPU_EXEC)
+               mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+       if (flags & KFD_IOCTL_SVM_FLAG_COHERENT)
+               mapping_flags |= AMDGPU_VM_MTYPE_UC;
+       else
+               mapping_flags |= AMDGPU_VM_MTYPE_NC;
+
+       /* TODO: add CHIP_ARCTURUS new flags for vram mapping */
+
+       pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
+
+       /* Apply ASIC specific mapping flags */
+       amdgpu_gmc_get_vm_pte(adev, &prange->mapping, &pte_flags);
+
+       pr_debug("PTE flags 0x%llx\n", pte_flags);
+
+       return pte_flags;
+}
+
+static int
+svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                        struct svm_range *prange, struct dma_fence **fence)
+{
+       uint64_t init_pte_value = 0;
+       uint64_t start;
+       uint64_t last;
+
+       start = prange->it_node.start;
+       last = prange->it_node.last;
+
+       pr_debug("svms 0x%p [0x%llx 0x%llx]\n", prange->svms, start, last);
+
+       return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL,
+                                          start, last, init_pte_value, 0,
+                                          NULL, NULL, fence);
+}
+
+static int
+svm_range_unmap_from_gpus(struct svm_range *prange)
+{
+       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+       struct kfd_process_device *pdd;
+       struct dma_fence *fence = NULL;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       struct kfd_dev *dev;
+       uint32_t gpuidx;
+       int r = 0;
+
+       bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+                 MAX_GPU_INSTANCE);
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
+               r = kfd_process_device_from_gpuidx(p, gpuidx, &dev);
+               if (r) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+
+               pdd = kfd_bind_process_to_device(dev, p);
+               if (IS_ERR(pdd))
+                       return -EINVAL;
+
+               adev = (struct amdgpu_device *)dev->kgd;
+
+               r = svm_range_unmap_from_gpu(adev, pdd->vm, prange, &fence);
+               if (r)
+                       break;
+
+               if (fence) {
+                       r = dma_fence_wait(fence, false);
+                       dma_fence_put(fence);
+                       fence = NULL;
+                       if (r)
+                               break;
+               }
+
+               amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+                                                 p->pasid);
+       }
+
+       return r;
+}
+
+static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo)
+{
+       struct ttm_operation_ctx ctx = { false, false };
+
+       amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+
+       return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+}
+
+static int
+svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+                    struct svm_range *prange, bool reserve_vm,
+                    struct dma_fence **fence)
+{
+       struct amdgpu_bo *root;
+       dma_addr_t *pages_addr;
+       uint64_t pte_flags;
+       int r = 0;
+
+       pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+                prange->it_node.start, prange->it_node.last);
+
+       if (reserve_vm) {
+               root = amdgpu_bo_ref(vm->root.base.bo);
+               r = amdgpu_bo_reserve(root, true);
+               if (r) {
+                       pr_debug("failed %d to reserve root bo\n", r);
+                       amdgpu_bo_unref(&root);
+                       goto out;
+               }
+               r = amdgpu_vm_validate_pt_bos(adev, vm, svm_range_bo_validate,
+                                             NULL);
+               if (r) {
+                       pr_debug("failed %d validate pt bos\n", r);
+                       goto unreserve_out;
+               }
+       }
+
+       prange->mapping.start = prange->it_node.start;
+       prange->mapping.last = prange->it_node.last;
+       prange->mapping.offset = 0;
+       pte_flags = svm_range_get_pte_flags(adev, prange);
+       prange->mapping.flags = pte_flags;
+       pages_addr = prange->pages_addr;
+
+       r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL,
+                                       prange->mapping.start,
+                                       prange->mapping.last, pte_flags,
+                                       prange->mapping.offset, NULL,
+                                       pages_addr, &vm->last_update);
+       if (r) {
+               pr_debug("failed %d to map to gpu 0x%lx\n", r,
+                        prange->it_node.start);
+               goto unreserve_out;
+       }
+
+
+       r = amdgpu_vm_update_pdes(adev, vm, false);
+       if (r) {
+               pr_debug("failed %d to update directories 0x%lx\n", r,
+                        prange->it_node.start);
+               goto unreserve_out;
+       }
+
+       if (fence)
+               *fence = dma_fence_get(vm->last_update);
+
+unreserve_out:
+       if (reserve_vm) {
+               amdgpu_bo_unreserve(root);
+               amdgpu_bo_unref(&root);
+       }
+
+out:
+       return r;
+}
+
+static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm)
+{
+       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+       struct kfd_process_device *pdd;
+       struct amdgpu_device *adev;
+       struct kfd_process *p;
+       struct kfd_dev *dev;
+       struct dma_fence *fence = NULL;
+       uint32_t gpuidx;
+       int r = 0;
+
+       bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+                 MAX_GPU_INSTANCE);
+       p = container_of(prange->svms, struct kfd_process, svms);
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+               r = kfd_process_device_from_gpuidx(p, gpuidx, &dev);
+               if (r) {
+                       pr_debug("failed to find device idx %d\n", gpuidx);
+                       return -EINVAL;
+               }
+
+               pdd = kfd_bind_process_to_device(dev, p);
+               if (IS_ERR(pdd))
+                       return -EINVAL;
+               adev = (struct amdgpu_device *)dev->kgd;
+
+               r = svm_range_map_to_gpu(adev, pdd->vm, prange, reserve_vm,
+                                        &fence);
+               if (r)
+                       break;
+
+               if (fence) {
+                       r = dma_fence_wait(fence, false);
+                       dma_fence_put(fence);
+                       fence = NULL;
+                       if (r) {
+                               pr_debug("failed %d to dma fence wait\n", r);
+                               break;
+                       }
+               }
+       }
+
+       return r;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
        struct svm_range *new;
@@ -750,6 +973,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, unsigned 
long start,
         */
        list_for_each_entry_safe(prange, tmp, &update_list, update_list) {
                list_del(&prange->list);
+               svm_range_unmap_from_gpus(prange);
                mutex_lock(&svms->free_list_lock);
                list_add_tail(&prange->remove_list, &svms->free_list);
                mutex_unlock(&svms->free_list_lock);
@@ -991,8 +1215,14 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
                }
 
                r = svm_range_validate(mm, prange);
-               if (r)
+               if (r) {
                        pr_debug("failed %d to validate svm range\n", r);
+                       goto out_unlock;
+               }
+
+               r = svm_range_map_to_gpus(prange, true);
+               if (r)
+                       pr_debug("failed %d to map svm range\n", r);
 
 out_unlock:
                if (r) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 4d394f72eefc..fb68b5ee54f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -42,6 +42,7 @@
  * @update_list:link list node used to add to update_list
  * @remove_list:link list node used to add to remove list
  * @hmm_range:  hmm range structure used by hmm_range_fault to get system pages
+ * @mapping:    bo_va mapping structure to create and update GPU page table
  * @npages:     number of pages
  * @pages_addr: list of system memory physical page address
  * @flags:      flags defined as KFD_IOCTL_SVM_FLAG_*
@@ -63,6 +64,7 @@ struct svm_range {
        struct list_head                update_list;
        struct list_head                remove_list;
        struct hmm_range                *hmm_range;
+       struct amdgpu_bo_va_mapping     mapping;
        uint64_t                        npages;
        dma_addr_t                      *pages_addr;
        uint32_t                        flags;
-- 
2.29.2

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

Reply via email to