On 2023-02-27 18:07, Xiaogang.Chen wrote:
From: Xiaogang Chen <xiaogang.c...@amd.com>

svm_migrate_ram_to_vram migrate a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in page for each 
migration.

Good catch. It's not the offset in the page, but the offset in the TTM resource, I think. See more nit-picks inline.


Signed-off-by: Xiaogang Chen <xiaogang.c...@amd.com>
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++++++++++-------
  1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1c625433ff30..60664e0cbc1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct 
migrate_vma *migrate)
  static int
  svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
                         struct migrate_vma *migrate, struct dma_fence **mfence,
-                        dma_addr_t *scratch)
+                        dma_addr_t *scratch, uint64_t *cur_dst)

The name cur_dst is a bit confusing. There is a local variable "dst" in this function, which has a very different meaning. It's the pointer to an array of VRAM physical addresses. A better name for cur_dst would be ttm_res_offset, as it is the offset from the start of the TTM resource.

I'd prefer if it was not a pointer. The calculations to increment the offset should be done at the top level, where it iterates through the VMAs. This low level code doesn't need to make any assumptions about how that iteration works.


  {
        uint64_t npages = migrate->npages;
        struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
        uint64_t i, j;
        int r;
- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-                prange->last);
+       pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start,
+                prange->last, *cur_dst);
src = scratch;
        dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
                goto out;
        }
- amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+       amdgpu_res_first(prange->ttm_res, *cur_dst << PAGE_SHIFT,

Maybe do the PAGE_SHIFT in the caller, where ttm_res_offset is calculated and updated.


                         npages << PAGE_SHIFT, &cursor);
        for (i = j = 0; i < npages; i++) {
                struct page *spage;
@@ -381,6 +381,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
                        migrate->dst[i] = 0;
                }
        }
+       *cur_dst = *cur_dst + i;
#ifdef DEBUG_FORCE_MIXED_DOMAINS
        for (i = 0, j = 0; i < npages; i += 4, j++) {
@@ -403,7 +404,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  static long
  svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
                        struct vm_area_struct *vma, uint64_t start,
-                       uint64_t end, uint32_t trigger)
+                       uint64_t end, uint32_t trigger, uint64_t *cur_dst)

Same as above.


  {
        struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
        uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +457,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
        else
                pr_debug("0x%lx pages migrated\n", cpages);
- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+       r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, 
cur_dst);
        migrate_vma_pages(&migrate);
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +505,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
        unsigned long addr, start, end;
        struct vm_area_struct *vma;
        struct amdgpu_device *adev;
+       uint64_t cur_dst;
        unsigned long cpages = 0;
        long r = 0;
@@ -524,6 +526,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT;
        end = (prange->last + 1) << PAGE_SHIFT;
+       cur_dst = prange->offset;

You could do the PAGE_SHIFT here.


for (addr = start; addr < end;) {
                unsigned long next;
@@ -533,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
                        break;
next = min(vma->vm_end, end);
-               r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, 
trigger);
+               r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, 
&cur_dst);
                if (r < 0) {
                        pr_debug("failed %ld to migrate\n", r);
                        break;

Do the increment somewhere here, before next is assigned to addr.

    ttm_res_offset += next - addr;

Regards,
  Felix


Reply via email to