On 2025-09-08 12:15, James Zhu wrote:
to get migration pages. dst MIGRATE_PFN_VALID bit and src
MIGRATE_PFN_MIGRATE bit should always be set when migration success.

cpage includes src MIGRATE_PFN_MIGRATE bit set and MIGRATE_PFN_VALID
bit unset pages for both ram and vram when memory is only allocated
without being populated before migration, those ram pages should be
counted as migrate pages and those vram pages should not be counted
as migrate pages. Here migration pages refer to how many vram pages
invloved.

Signed-off-by: James Zhu <james....@amd.com>
---
  drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 30 +++++++++++++-----------
  1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index f0b690d4bb46..83b9d019c885 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -260,17 +260,18 @@ static void svm_migrate_put_sys_page(unsigned long addr)
        put_page(page);
  }
-static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
+static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
  {
-       unsigned long upages = 0;
+       unsigned long mpages = 0;
        unsigned long i;
for (i = 0; i < migrate->npages; i++) {
-               if (migrate->src[i] & MIGRATE_PFN_VALID &&
-                   !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
-                       upages++;
+               if (migrate->dst[i] & MIGRATE_PFN_VALID &&
+                       migrate->src[i] & MIGRATE_PFN_MIGRATE)
+                               mpages++;
+               }
        }
-       return upages;
+       return mpages;
  }

To fix the incorrect page counting, maybe add this check in svm_migrate_unsuccessful_pages, I fell this is easier to understand than the condition added in svm_migrate_successful_pages

@@ -278,6 +278,15 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
                if (migrate->src[i] & MIGRATE_PFN_VALID &&
                    !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
                        upages++;
+               /*
+                * if migrating from vram to ram, don't count device pages
+                * which are not populated, this could happen if svm_bo is
+                * evicted.
+                */
+               if (!(migrate.flags & MIGRATE_VMA_SELECT_SYSTEM) &&
+                   !(migrate->src[i] & MIGRATE_PFN_VALID) &&
+                   migrate->src[i] & MIGRATE_PFN_MIGRATE)
+                       upages++;
        }
        return upages;

  static int
@@ -447,8 +448,8 @@ svm_migrate_vma_to_vram(struct kfd_node *node, struct 
svm_range *prange,
        svm_migrate_copy_done(adev, mfence);
        migrate_vma_finalize(&migrate);
- mpages = cpages - svm_migrate_unsuccessful_pages(&migrate);
-       pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+       mpages = svm_migrate_successful_pages(&migrate);
+       pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
                         mpages, cpages, migrate.npages);
svm_range_dma_unmap_dev(adev->dev, scratch, 0, npages);
@@ -688,7 +689,6 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
  {
        struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
        uint64_t npages = (end - start) >> PAGE_SHIFT;
-       unsigned long upages = npages;
        unsigned long cpages = 0;
        unsigned long mpages = 0;
        struct amdgpu_device *adev = node->adev;
@@ -748,9 +748,9 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
                                    scratch, npages);
        migrate_vma_pages(&migrate);
- upages = svm_migrate_unsuccessful_pages(&migrate);
-       pr_debug("unsuccessful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
-                upages, cpages, migrate.npages);
+       mpages = svm_migrate_successful_pages(&migrate);
+       pr_debug("migrated/collected/requested 0x%lx/0x%lx/0x%lx\n",
+               mpages, cpages, migrate.npages);
svm_migrate_copy_done(adev, mfence);
        migrate_vma_finalize(&migrate);
@@ -763,8 +763,7 @@ svm_migrate_vma_to_ram(struct kfd_node *node, struct 
svm_range *prange,
                                    start >> PAGE_SHIFT, end >> PAGE_SHIFT,
                                    node->id, 0, trigger, r);
  out:
-       if (!r && cpages) {
-               mpages = cpages - upages;
+       if (!r && mpages) {
                pdd = svm_range_get_pdd_by_node(prange, node);
                if (pdd)
                        WRITE_ONCE(pdd->page_out, pdd->page_out + mpages);
@@ -847,6 +846,9 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm,
        }
if (r >= 0) {
+               WARN_ONCE(prange->vram_pages < mpages,
+                       "Recorded vram pages(0x%llx) should not be less than 
migration pages(0x%lx).",
+                       prange->vram_pages, mpages);

It is good to add warning once here, should we also change u64 prange->vram_pages to 0 for this case, otherwise this could leak svm_bo as prange->vram_pages never become 0?

Regards,

Philip

                prange->vram_pages -= mpages;
/* prange does not have vram page set its actual_loc to system

Reply via email to