Hi,

On 9/3/25 04:18, Balbir Singh wrote:

> Extend migrate_vma_collect_pmd() to handle partially mapped large
> folios that require splitting before migration can proceed.
>
> During PTE walk in the collection phase, if a large folio is only
> partially mapped in the migration range, it must be split to ensure
> the folio is correctly migrated.
>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: David Hildenbrand <da...@redhat.com>
> Cc: Zi Yan <z...@nvidia.com>
> Cc: Joshua Hahn <joshua.hah...@gmail.com>
> Cc: Rakie Kim <rakie....@sk.com>
> Cc: Byungchul Park <byungc...@sk.com>
> Cc: Gregory Price <gou...@gourry.net>
> Cc: Ying Huang <ying.hu...@linux.alibaba.com>
> Cc: Alistair Popple <apop...@nvidia.com>
> Cc: Oscar Salvador <osalva...@suse.de>
> Cc: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>
> Cc: Baolin Wang <baolin.w...@linux.alibaba.com>
> Cc: "Liam R. Howlett" <liam.howl...@oracle.com>
> Cc: Nico Pache <npa...@redhat.com>
> Cc: Ryan Roberts <ryan.robe...@arm.com>
> Cc: Dev Jain <dev.j...@arm.com>
> Cc: Barry Song <bao...@kernel.org>
> Cc: Lyude Paul <ly...@redhat.com>
> Cc: Danilo Krummrich <d...@kernel.org>
> Cc: David Airlie <airl...@gmail.com>
> Cc: Simona Vetter <sim...@ffwll.ch>
> Cc: Ralph Campbell <rcampb...@nvidia.com>
> Cc: Mika Penttilä <mpent...@redhat.com>
> Cc: Matthew Brost <matthew.br...@intel.com>
> Cc: Francois Dugast <francois.dug...@intel.com>
>
> Signed-off-by: Balbir Singh <balb...@nvidia.com>
> ---
>  mm/migrate_device.c | 95 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 95 insertions(+)
>
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index e05e14d6eacd..e58c3f9d01c8 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -54,6 +54,54 @@ static int migrate_vma_collect_hole(unsigned long start,
>       return 0;
>  }
>  
> +/**
> + * migrate_vma_split_folio - Helper function to split a(n) (m)THP folio
> + *
> + * @folio - the folio to split
> + * @fault_page - struct page associated with the fault if any
> + *
> + * Returns 0 on success
> + */
> +static int migrate_vma_split_folio(struct folio *folio,
> +                                struct page *fault_page)
> +{
> +     int ret;
> +     struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL;
> +     struct folio *new_fault_folio = NULL;
> +
> +     if (folio != fault_folio) {
> +             folio_get(folio);
> +             folio_lock(folio);
> +     }
> +
> +     ret = split_folio(folio);
> +     if (ret) {
> +             if (folio != fault_folio) {
> +                     folio_unlock(folio);
> +                     folio_put(folio);
> +             }
> +             return ret;
> +     }
> +
> +     new_fault_folio = fault_page ? page_folio(fault_page) : NULL;
> +
> +     /*
> +      * Ensure the lock is held on the correct
> +      * folio after the split
> +      */
> +     if (!new_fault_folio) {
> +             folio_unlock(folio);
> +             folio_put(folio);
> +     } else if (folio != new_fault_folio) {
> +             folio_get(new_fault_folio);
> +             folio_lock(new_fault_folio);
> +             folio_unlock(folio);
> +             folio_put(folio);
> +     }
> +
> +     return 0;
> +}
> +
>  static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                                  unsigned long start,
>                                  unsigned long end,
> @@ -136,6 +184,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                        * page table entry. Other special swap entries are not
>                        * migratable, and we ignore regular swapped page.
>                        */
> +                     struct folio *folio;
> +
>                       entry = pte_to_swp_entry(pte);
>                       if (!is_device_private_entry(entry))
>                               goto next;
> @@ -147,6 +197,29 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                           pgmap->owner != migrate->pgmap_owner)
>                               goto next;
>  
> +                     folio = page_folio(page);
> +                     if (folio_test_large(folio)) {
> +                             int ret;
> +
> +                             /*
> +                              * The reason for finding pmd present with a
> +                              * large folio for the pte is partial unmaps.
> +                              * Split the folio now for the migration to be
> +                              * handled correctly
> +                              */
> +                             pte_unmap_unlock(ptep, ptl);
> +                             ret = migrate_vma_split_folio(folio,
> +                                                       migrate->fault_page);
> +
> +                             if (ret) {
> +                                     ptep = pte_offset_map_lock(mm, pmdp, 
> addr, &ptl);
> +                                     goto next;
> +                             }
> +
> +                             addr = start;
> +                             goto again;
> +                     }
> +
>                       mpfn = migrate_pfn(page_to_pfn(page)) |
>                                       MIGRATE_PFN_MIGRATE;
>                       if (is_writable_device_private_entry(entry))
> @@ -171,6 +244,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                                       pgmap->owner != migrate->pgmap_owner)
>                                       goto next;
>                       }
> +                     folio = page_folio(page);
> +                     if (folio_test_large(folio)) {
> +                             int ret;
> +
> +                             /*
> +                              * The reason for finding pmd present with a
> +                              * large folio for the pte is partial unmaps.
> +                              * Split the folio now for the migration to be
> +                              * handled correctly
> +                              */

There are other reasons like vma splits for various reasons.

> +                             pte_unmap_unlock(ptep, ptl);
> +                             ret = migrate_vma_split_folio(folio,
> +                                                       migrate->fault_page);
> +
> +                             if (ret) {
> +                                     ptep = pte_offset_map_lock(mm, pmdp, 
> addr, &ptl);
> +                                     goto next;
> +                             }
> +
> +                             addr = start;
> +                             goto again;
> +                     }
>                       mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
>                       mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
>               }

Reply via email to