MIGRATE_VMA_SELECT_COMPOUND will be used to select THP pages during
migrate_vma_setup() and MIGRATE_PFN_COMPOUND will make migrating
device pages as compound pages during device pfn migration.

migrate_device code paths go through the collect, setup
and finalize phases of migration.

The entries in src and dst arrays passed to these functions still
remain at a PAGE_SIZE granularity. When a compound page is passed,
the first entry has the PFN along with MIGRATE_PFN_COMPOUND
and other flags set (MIGRATE_PFN_MIGRATE, MIGRATE_PFN_VALID), the
remaining entries (HPAGE_PMD_NR - 1) are filled with 0's. This
representation allows for the compound page to be split into smaller
page sizes.

migrate_vma_collect_hole(), migrate_vma_collect_pmd() are now THP
page aware. Two new helper functions migrate_vma_collect_huge_pmd()
and migrate_vma_insert_huge_pmd_page() have been added.

migrate_vma_collect_huge_pmd() can collect THP pages, but if for
some reason this fails, there is fallback support to split the folio
and migrate it.

migrate_vma_insert_huge_pmd_page() closely follows the logic of
migrate_vma_insert_page()

Support for splitting pages as needed for migration will follow in
later patches in this series.

Cc: Andrew Morton <a...@linux-foundation.org>
Cc: David Hildenbrand <da...@redhat.com>
Cc: Zi Yan <z...@nvidia.com>
Cc: Joshua Hahn <joshua.hah...@gmail.com>
Cc: Rakie Kim <rakie....@sk.com>
Cc: Byungchul Park <byungc...@sk.com>
Cc: Gregory Price <gou...@gourry.net>
Cc: Ying Huang <ying.hu...@linux.alibaba.com>
Cc: Alistair Popple <apop...@nvidia.com>
Cc: Oscar Salvador <osalva...@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>
Cc: Baolin Wang <baolin.w...@linux.alibaba.com>
Cc: "Liam R. Howlett" <liam.howl...@oracle.com>
Cc: Nico Pache <npa...@redhat.com>
Cc: Ryan Roberts <ryan.robe...@arm.com>
Cc: Dev Jain <dev.j...@arm.com>
Cc: Barry Song <bao...@kernel.org>
Cc: Lyude Paul <ly...@redhat.com>
Cc: Danilo Krummrich <d...@kernel.org>
Cc: David Airlie <airl...@gmail.com>
Cc: Simona Vetter <sim...@ffwll.ch>
Cc: Ralph Campbell <rcampb...@nvidia.com>
Cc: Mika Penttilä <mpent...@redhat.com>
Cc: Matthew Brost <matthew.br...@intel.com>
Cc: Francois Dugast <francois.dug...@intel.com>

Signed-off-by: Balbir Singh <balb...@nvidia.com>
---
 include/linux/migrate.h |   2 +
 mm/migrate_device.c     | 457 ++++++++++++++++++++++++++++++++++------
 2 files changed, 396 insertions(+), 63 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index acadd41e0b5c..d9cef0819f91 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -129,6 +129,7 @@ static inline int migrate_misplaced_folio(struct folio 
*folio, int node)
 #define MIGRATE_PFN_VALID      (1UL << 0)
 #define MIGRATE_PFN_MIGRATE    (1UL << 1)
 #define MIGRATE_PFN_WRITE      (1UL << 3)
+#define MIGRATE_PFN_COMPOUND   (1UL << 4)
 #define MIGRATE_PFN_SHIFT      6
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
@@ -147,6 +148,7 @@ enum migrate_vma_direction {
        MIGRATE_VMA_SELECT_SYSTEM = 1 << 0,
        MIGRATE_VMA_SELECT_DEVICE_PRIVATE = 1 << 1,
        MIGRATE_VMA_SELECT_DEVICE_COHERENT = 1 << 2,
+       MIGRATE_VMA_SELECT_COMPOUND = 1 << 3,
 };
 
 struct migrate_vma {
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 0ed337f94fcd..6621bba62710 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -14,6 +14,7 @@
 #include <linux/pagewalk.h>
 #include <linux/rmap.h>
 #include <linux/swapops.h>
+#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
 
@@ -44,6 +45,23 @@ static int migrate_vma_collect_hole(unsigned long start,
        if (!vma_is_anonymous(walk->vma))
                return migrate_vma_collect_skip(start, end, walk);
 
+       if (thp_migration_supported() &&
+               (migrate->flags & MIGRATE_VMA_SELECT_COMPOUND) &&
+               (IS_ALIGNED(start, HPAGE_PMD_SIZE) &&
+                IS_ALIGNED(end, HPAGE_PMD_SIZE))) {
+               migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE |
+                                               MIGRATE_PFN_COMPOUND;
+               migrate->dst[migrate->npages] = 0;
+               migrate->npages++;
+               migrate->cpages++;
+
+               /*
+                * Collect the remaining entries as holes, in case we
+                * need to split later
+                */
+               return migrate_vma_collect_skip(start + PAGE_SIZE, end, walk);
+       }
+
        for (addr = start; addr < end; addr += PAGE_SIZE) {
                migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
                migrate->dst[migrate->npages] = 0;
@@ -54,57 +72,151 @@ static int migrate_vma_collect_hole(unsigned long start,
        return 0;
 }
 
-static int migrate_vma_collect_pmd(pmd_t *pmdp,
-                                  unsigned long start,
-                                  unsigned long end,
-                                  struct mm_walk *walk)
+/**
+ * migrate_vma_collect_huge_pmd - collect THP pages without splitting the
+ * folio for device private pages.
+ * @pmdp: pointer to pmd entry
+ * @start: start address of the range for migration
+ * @end: end address of the range for migration
+ * @walk: mm_walk callback structure
+ *
+ * Collect the huge pmd entry at @pmdp for migration and set the
+ * MIGRATE_PFN_COMPOUND flag in the migrate src entry to indicate that
+ * migration will occur at HPAGE_PMD granularity
+ */
+static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start,
+                                       unsigned long end, struct mm_walk *walk,
+                                       struct folio *fault_folio)
 {
+       struct mm_struct *mm = walk->mm;
+       struct folio *folio;
        struct migrate_vma *migrate = walk->private;
-       struct folio *fault_folio = migrate->fault_page ?
-               page_folio(migrate->fault_page) : NULL;
-       struct vm_area_struct *vma = walk->vma;
-       struct mm_struct *mm = vma->vm_mm;
-       unsigned long addr = start, unmapped = 0;
        spinlock_t *ptl;
-       pte_t *ptep;
+       swp_entry_t entry;
+       int ret;
+       unsigned long write = 0;
 
-again:
-       if (pmd_none(*pmdp))
+       ptl = pmd_lock(mm, pmdp);
+       if (pmd_none(*pmdp)) {
+               spin_unlock(ptl);
                return migrate_vma_collect_hole(start, end, -1, walk);
+       }
 
        if (pmd_trans_huge(*pmdp)) {
-               struct folio *folio;
-
-               ptl = pmd_lock(mm, pmdp);
-               if (unlikely(!pmd_trans_huge(*pmdp))) {
+               if (!(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
                        spin_unlock(ptl);
-                       goto again;
+                       return migrate_vma_collect_skip(start, end, walk);
                }
 
                folio = pmd_folio(*pmdp);
                if (is_huge_zero_folio(folio)) {
                        spin_unlock(ptl);
-                       split_huge_pmd(vma, pmdp, addr);
-               } else {
-                       int ret;
+                       return migrate_vma_collect_hole(start, end, -1, walk);
+               }
+               if (pmd_write(*pmdp))
+                       write = MIGRATE_PFN_WRITE;
+       } else if (!pmd_present(*pmdp)) {
+               entry = pmd_to_swp_entry(*pmdp);
+               folio = pfn_swap_entry_folio(entry);
+
+               if (!is_device_private_entry(entry) ||
+                       !(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
+                       (folio->pgmap->owner != migrate->pgmap_owner)) {
+                       spin_unlock(ptl);
+                       return migrate_vma_collect_skip(start, end, walk);
+               }
 
-                       folio_get(folio);
+               if (is_migration_entry(entry)) {
+                       migration_entry_wait_on_locked(entry, ptl);
                        spin_unlock(ptl);
-                       /* FIXME: we don't expect THP for fault_folio */
-                       if (WARN_ON_ONCE(fault_folio == folio))
-                               return migrate_vma_collect_skip(start, end,
-                                                               walk);
-                       if (unlikely(!folio_trylock(folio)))
-                               return migrate_vma_collect_skip(start, end,
-                                                               walk);
-                       ret = split_folio(folio);
-                       if (fault_folio != folio)
-                               folio_unlock(folio);
-                       folio_put(folio);
-                       if (ret)
-                               return migrate_vma_collect_skip(start, end,
-                                                               walk);
+                       return -EAGAIN;
                }
+
+               if (is_writable_device_private_entry(entry))
+                       write = MIGRATE_PFN_WRITE;
+       } else {
+               spin_unlock(ptl);
+               return -EAGAIN;
+       }
+
+       folio_get(folio);
+       if (folio != fault_folio && unlikely(!folio_trylock(folio))) {
+               spin_unlock(ptl);
+               folio_put(folio);
+               return migrate_vma_collect_skip(start, end, walk);
+       }
+
+       if (thp_migration_supported() &&
+               (migrate->flags & MIGRATE_VMA_SELECT_COMPOUND) &&
+               (IS_ALIGNED(start, HPAGE_PMD_SIZE) &&
+                IS_ALIGNED(end, HPAGE_PMD_SIZE))) {
+
+               struct page_vma_mapped_walk pvmw = {
+                       .ptl = ptl,
+                       .address = start,
+                       .pmd = pmdp,
+                       .vma = walk->vma,
+               };
+
+               unsigned long pfn = page_to_pfn(folio_page(folio, 0));
+
+               migrate->src[migrate->npages] = migrate_pfn(pfn) | write
+                                               | MIGRATE_PFN_MIGRATE
+                                               | MIGRATE_PFN_COMPOUND;
+               migrate->dst[migrate->npages++] = 0;
+               migrate->cpages++;
+               ret = set_pmd_migration_entry(&pvmw, folio_page(folio, 0));
+               if (ret) {
+                       migrate->npages--;
+                       migrate->cpages--;
+                       migrate->src[migrate->npages] = 0;
+                       migrate->dst[migrate->npages] = 0;
+                       goto fallback;
+               }
+               migrate_vma_collect_skip(start + PAGE_SIZE, end, walk);
+               spin_unlock(ptl);
+               return 0;
+       }
+
+fallback:
+       spin_unlock(ptl);
+       if (!folio_test_large(folio))
+               goto done;
+       ret = split_folio(folio);
+       if (fault_folio != folio)
+               folio_unlock(folio);
+       folio_put(folio);
+       if (ret)
+               return migrate_vma_collect_skip(start, end, walk);
+       if (pmd_none(pmdp_get_lockless(pmdp)))
+               return migrate_vma_collect_hole(start, end, -1, walk);
+
+done:
+       return -ENOENT;
+}
+
+static int migrate_vma_collect_pmd(pmd_t *pmdp,
+                                  unsigned long start,
+                                  unsigned long end,
+                                  struct mm_walk *walk)
+{
+       struct migrate_vma *migrate = walk->private;
+       struct vm_area_struct *vma = walk->vma;
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long addr = start, unmapped = 0;
+       spinlock_t *ptl;
+       struct folio *fault_folio = migrate->fault_page ?
+               page_folio(migrate->fault_page) : NULL;
+       pte_t *ptep;
+
+again:
+       if (pmd_trans_huge(*pmdp) || !pmd_present(*pmdp)) {
+               int ret = migrate_vma_collect_huge_pmd(pmdp, start, end, walk, 
fault_folio);
+
+               if (ret == -EAGAIN)
+                       goto again;
+               if (ret == 0)
+                       return 0;
        }
 
        ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
@@ -222,8 +334,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
                        mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
                }
 
-               /* FIXME support THP */
-               if (!page || !page->mapping || PageTransCompound(page)) {
+               if (!page || !page->mapping) {
                        mpfn = 0;
                        goto next;
                }
@@ -394,14 +505,6 @@ static bool migrate_vma_check_page(struct page *page, 
struct page *fault_page)
         */
        int extra = 1 + (page == fault_page);
 
-       /*
-        * FIXME support THP (transparent huge page), it is bit more complex to
-        * check them than regular pages, because they can be mapped with a pmd
-        * or with a pte (split pte mapping).
-        */
-       if (folio_test_large(folio))
-               return false;
-
        /* Page from ZONE_DEVICE have one extra reference */
        if (folio_is_zone_device(folio))
                extra++;
@@ -432,17 +535,24 @@ static unsigned long migrate_device_unmap(unsigned long 
*src_pfns,
 
        lru_add_drain();
 
-       for (i = 0; i < npages; i++) {
+       for (i = 0; i < npages; ) {
                struct page *page = migrate_pfn_to_page(src_pfns[i]);
                struct folio *folio;
+               unsigned int nr = 1;
 
                if (!page) {
                        if (src_pfns[i] & MIGRATE_PFN_MIGRATE)
                                unmapped++;
-                       continue;
+                       goto next;
                }
 
                folio = page_folio(page);
+               nr = folio_nr_pages(folio);
+
+               if (nr > 1)
+                       src_pfns[i] |= MIGRATE_PFN_COMPOUND;
+
+
                /* ZONE_DEVICE folios are not on LRU */
                if (!folio_is_zone_device(folio)) {
                        if (!folio_test_lru(folio) && allow_drain) {
@@ -454,7 +564,7 @@ static unsigned long migrate_device_unmap(unsigned long 
*src_pfns,
                        if (!folio_isolate_lru(folio)) {
                                src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
                                restore++;
-                               continue;
+                               goto next;
                        }
 
                        /* Drop the reference we took in collect */
@@ -473,10 +583,12 @@ static unsigned long migrate_device_unmap(unsigned long 
*src_pfns,
 
                        src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
                        restore++;
-                       continue;
+                       goto next;
                }
 
                unmapped++;
+next:
+               i += nr;
        }
 
        for (i = 0; i < npages && restore; i++) {
@@ -622,6 +734,147 @@ int migrate_vma_setup(struct migrate_vma *args)
 }
 EXPORT_SYMBOL(migrate_vma_setup);
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+/**
+ * migrate_vma_insert_huge_pmd_page: Insert a huge folio into 
@migrate->vma->vm_mm
+ * at @addr. folio is already allocated as a part of the migration process with
+ * large page.
+ *
+ * @folio needs to be initialized and setup after it's allocated. The code bits
+ * here follow closely the code in __do_huge_pmd_anonymous_page(). This API 
does
+ * not support THP zero pages.
+ *
+ * @migrate: migrate_vma arguments
+ * @addr: address where the folio will be inserted
+ * @folio: folio to be inserted at @addr
+ * @src: src pfn which is being migrated
+ * @pmdp: pointer to the pmd
+ */
+static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
+                                        unsigned long addr,
+                                        struct page *page,
+                                        unsigned long *src,
+                                        pmd_t *pmdp)
+{
+       struct vm_area_struct *vma = migrate->vma;
+       gfp_t gfp = vma_thp_gfp_mask(vma);
+       struct folio *folio = page_folio(page);
+       int ret;
+       vm_fault_t csa_ret;
+       spinlock_t *ptl;
+       pgtable_t pgtable;
+       pmd_t entry;
+       bool flush = false;
+       unsigned long i;
+
+       VM_WARN_ON_FOLIO(!folio, folio);
+       VM_WARN_ON_ONCE(!pmd_none(*pmdp) && !is_huge_zero_pmd(*pmdp));
+
+       if (!thp_vma_suitable_order(vma, addr, HPAGE_PMD_ORDER))
+               return -EINVAL;
+
+       ret = anon_vma_prepare(vma);
+       if (ret)
+               return ret;
+
+       folio_set_order(folio, HPAGE_PMD_ORDER);
+       folio_set_large_rmappable(folio);
+
+       if (mem_cgroup_charge(folio, migrate->vma->vm_mm, gfp)) {
+               count_vm_event(THP_FAULT_FALLBACK);
+               count_mthp_stat(HPAGE_PMD_ORDER, 
MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
+               ret = -ENOMEM;
+               goto abort;
+       }
+
+       __folio_mark_uptodate(folio);
+
+       pgtable = pte_alloc_one(vma->vm_mm);
+       if (unlikely(!pgtable))
+               goto abort;
+
+       if (folio_is_device_private(folio)) {
+               swp_entry_t swp_entry;
+
+               if (vma->vm_flags & VM_WRITE)
+                       swp_entry = make_writable_device_private_entry(
+                                               page_to_pfn(page));
+               else
+                       swp_entry = make_readable_device_private_entry(
+                                               page_to_pfn(page));
+               entry = swp_entry_to_pmd(swp_entry);
+       } else {
+               if (folio_is_zone_device(folio) &&
+                   !folio_is_device_coherent(folio)) {
+                       goto abort;
+               }
+               entry = folio_mk_pmd(folio, vma->vm_page_prot);
+               if (vma->vm_flags & VM_WRITE)
+                       entry = pmd_mkwrite(pmd_mkdirty(entry), vma);
+       }
+
+       ptl = pmd_lock(vma->vm_mm, pmdp);
+       csa_ret = check_stable_address_space(vma->vm_mm);
+       if (csa_ret)
+               goto abort;
+
+       /*
+        * Check for userfaultfd but do not deliver the fault. Instead,
+        * just back off.
+        */
+       if (userfaultfd_missing(vma))
+               goto unlock_abort;
+
+       if (!pmd_none(*pmdp)) {
+               if (!is_huge_zero_pmd(*pmdp))
+                       goto unlock_abort;
+               flush = true;
+       } else if (!pmd_none(*pmdp))
+               goto unlock_abort;
+
+       add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+       folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
+       if (!folio_is_zone_device(folio))
+               folio_add_lru_vma(folio, vma);
+       folio_get(folio);
+
+       if (flush) {
+               pte_free(vma->vm_mm, pgtable);
+               flush_cache_page(vma, addr, addr + HPAGE_PMD_SIZE);
+               pmdp_invalidate(vma, addr, pmdp);
+       } else {
+               pgtable_trans_huge_deposit(vma->vm_mm, pmdp, pgtable);
+               mm_inc_nr_ptes(vma->vm_mm);
+       }
+       set_pmd_at(vma->vm_mm, addr, pmdp, entry);
+       update_mmu_cache_pmd(vma, addr, pmdp);
+
+       spin_unlock(ptl);
+
+       count_vm_event(THP_FAULT_ALLOC);
+       count_mthp_stat(HPAGE_PMD_ORDER, MTHP_STAT_ANON_FAULT_ALLOC);
+       count_memcg_event_mm(vma->vm_mm, THP_FAULT_ALLOC);
+
+       return 0;
+
+unlock_abort:
+       spin_unlock(ptl);
+abort:
+       for (i = 0; i < HPAGE_PMD_NR; i++)
+               src[i] &= ~MIGRATE_PFN_MIGRATE;
+       return 0;
+}
+#else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */
+static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
+                                        unsigned long addr,
+                                        struct page *page,
+                                        unsigned long *src,
+                                        pmd_t *pmdp)
+{
+       return 0;
+}
+#endif
+
 /*
  * This code closely matches the code in:
  *   __handle_mm_fault()
@@ -632,9 +885,10 @@ EXPORT_SYMBOL(migrate_vma_setup);
  */
 static void migrate_vma_insert_page(struct migrate_vma *migrate,
                                    unsigned long addr,
-                                   struct page *page,
+                                   unsigned long *dst,
                                    unsigned long *src)
 {
+       struct page *page = migrate_pfn_to_page(*dst);
        struct folio *folio = page_folio(page);
        struct vm_area_struct *vma = migrate->vma;
        struct mm_struct *mm = vma->vm_mm;
@@ -662,8 +916,25 @@ static void migrate_vma_insert_page(struct migrate_vma 
*migrate,
        pmdp = pmd_alloc(mm, pudp, addr);
        if (!pmdp)
                goto abort;
-       if (pmd_trans_huge(*pmdp))
-               goto abort;
+
+       if (thp_migration_supported() && (*dst & MIGRATE_PFN_COMPOUND)) {
+               int ret = migrate_vma_insert_huge_pmd_page(migrate, addr, page,
+                                                               src, pmdp);
+               if (ret)
+                       goto abort;
+               return;
+       }
+
+       if (!pmd_none(*pmdp)) {
+               if (pmd_trans_huge(*pmdp)) {
+                       if (!is_huge_zero_pmd(*pmdp))
+                               goto abort;
+                       folio_get(pmd_folio(*pmdp));
+                       split_huge_pmd(vma, pmdp, addr);
+               } else if (pmd_leaf(*pmdp))
+                       goto abort;
+       }
+
        if (pte_alloc(mm, pmdp))
                goto abort;
        if (unlikely(anon_vma_prepare(vma)))
@@ -754,23 +1025,24 @@ static void __migrate_device_pages(unsigned long 
*src_pfns,
        unsigned long i;
        bool notified = false;
 
-       for (i = 0; i < npages; i++) {
+       for (i = 0; i < npages; ) {
                struct page *newpage = migrate_pfn_to_page(dst_pfns[i]);
                struct page *page = migrate_pfn_to_page(src_pfns[i]);
                struct address_space *mapping;
                struct folio *newfolio, *folio;
                int r, extra_cnt = 0;
+               unsigned long nr = 1;
 
                if (!newpage) {
                        src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
-                       continue;
+                       goto next;
                }
 
                if (!page) {
                        unsigned long addr;
 
                        if (!(src_pfns[i] & MIGRATE_PFN_MIGRATE))
-                               continue;
+                               goto next;
 
                        /*
                         * The only time there is no vma is when called from
@@ -788,15 +1060,47 @@ static void __migrate_device_pages(unsigned long 
*src_pfns,
                                        migrate->pgmap_owner);
                                mmu_notifier_invalidate_range_start(&range);
                        }
-                       migrate_vma_insert_page(migrate, addr, newpage,
+
+                       if ((src_pfns[i] & MIGRATE_PFN_COMPOUND) &&
+                               (!(dst_pfns[i] & MIGRATE_PFN_COMPOUND))) {
+                               nr = HPAGE_PMD_NR;
+                               src_pfns[i] &= ~MIGRATE_PFN_COMPOUND;
+                               src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
+                               goto next;
+                       }
+
+                       migrate_vma_insert_page(migrate, addr, &dst_pfns[i],
                                                &src_pfns[i]);
-                       continue;
+                       goto next;
                }
 
                newfolio = page_folio(newpage);
                folio = page_folio(page);
                mapping = folio_mapping(folio);
 
+               /*
+                * If THP migration is enabled, check if both src and dst
+                * can migrate large pages
+                */
+               if (thp_migration_supported()) {
+                       if ((src_pfns[i] & MIGRATE_PFN_MIGRATE) &&
+                               (src_pfns[i] & MIGRATE_PFN_COMPOUND) &&
+                               !(dst_pfns[i] & MIGRATE_PFN_COMPOUND)) {
+
+                               if (!migrate) {
+                                       src_pfns[i] &= ~(MIGRATE_PFN_MIGRATE |
+                                                        MIGRATE_PFN_COMPOUND);
+                                       goto next;
+                               }
+                               src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
+                       } else if ((src_pfns[i] & MIGRATE_PFN_MIGRATE) &&
+                               (dst_pfns[i] & MIGRATE_PFN_COMPOUND) &&
+                               !(src_pfns[i] & MIGRATE_PFN_COMPOUND)) {
+                               src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
+                       }
+               }
+
+
                if (folio_is_device_private(newfolio) ||
                    folio_is_device_coherent(newfolio)) {
                        if (mapping) {
@@ -809,7 +1113,7 @@ static void __migrate_device_pages(unsigned long *src_pfns,
                                if (!folio_test_anon(folio) ||
                                    !folio_free_swap(folio)) {
                                        src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
-                                       continue;
+                                       goto next;
                                }
                        }
                } else if (folio_is_zone_device(newfolio)) {
@@ -817,7 +1121,7 @@ static void __migrate_device_pages(unsigned long *src_pfns,
                         * Other types of ZONE_DEVICE page are not supported.
                         */
                        src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
-                       continue;
+                       goto next;
                }
 
                BUG_ON(folio_test_writeback(folio));
@@ -829,6 +1133,8 @@ static void __migrate_device_pages(unsigned long *src_pfns,
                        src_pfns[i] &= ~MIGRATE_PFN_MIGRATE;
                else
                        folio_migrate_flags(newfolio, folio);
+next:
+               i += nr;
        }
 
        if (notified)
@@ -990,10 +1296,23 @@ static unsigned long migrate_device_pfn_lock(unsigned 
long pfn)
 int migrate_device_range(unsigned long *src_pfns, unsigned long start,
                        unsigned long npages)
 {
-       unsigned long i, pfn;
+       unsigned long i, j, pfn;
+
+       for (pfn = start, i = 0; i < npages; pfn++, i++) {
+               struct page *page = pfn_to_page(pfn);
+               struct folio *folio = page_folio(page);
+               unsigned int nr = 1;
 
-       for (pfn = start, i = 0; i < npages; pfn++, i++)
                src_pfns[i] = migrate_device_pfn_lock(pfn);
+               nr = folio_nr_pages(folio);
+               if (nr > 1) {
+                       src_pfns[i] |= MIGRATE_PFN_COMPOUND;
+                       for (j = 1; j < nr; j++)
+                               src_pfns[i+j] = 0;
+                       i += j - 1;
+                       pfn += j - 1;
+               }
+       }
 
        migrate_device_unmap(src_pfns, npages, NULL);
 
@@ -1011,10 +1330,22 @@ EXPORT_SYMBOL(migrate_device_range);
  */
 int migrate_device_pfns(unsigned long *src_pfns, unsigned long npages)
 {
-       unsigned long i;
+       unsigned long i, j;
+
+       for (i = 0; i < npages; i++) {
+               struct page *page = pfn_to_page(src_pfns[i]);
+               struct folio *folio = page_folio(page);
+               unsigned int nr = 1;
 
-       for (i = 0; i < npages; i++)
                src_pfns[i] = migrate_device_pfn_lock(src_pfns[i]);
+               nr = folio_nr_pages(folio);
+               if (nr > 1) {
+                       src_pfns[i] |= MIGRATE_PFN_COMPOUND;
+                       for (j = 1; j < nr; j++)
+                               src_pfns[i+j] = 0;
+                       i += j - 1;
+               }
+       }
 
        migrate_device_unmap(src_pfns, npages, NULL);
 
-- 
2.50.1

Reply via email to