Hi, On 9/3/25 04:18, Balbir Singh wrote:
> Extend migrate_vma_collect_pmd() to handle partially mapped large > folios that require splitting before migration can proceed. > > During PTE walk in the collection phase, if a large folio is only > partially mapped in the migration range, it must be split to ensure > the folio is correctly migrated. > > Cc: Andrew Morton <a...@linux-foundation.org> > Cc: David Hildenbrand <da...@redhat.com> > Cc: Zi Yan <z...@nvidia.com> > Cc: Joshua Hahn <joshua.hah...@gmail.com> > Cc: Rakie Kim <rakie....@sk.com> > Cc: Byungchul Park <byungc...@sk.com> > Cc: Gregory Price <gou...@gourry.net> > Cc: Ying Huang <ying.hu...@linux.alibaba.com> > Cc: Alistair Popple <apop...@nvidia.com> > Cc: Oscar Salvador <osalva...@suse.de> > Cc: Lorenzo Stoakes <lorenzo.stoa...@oracle.com> > Cc: Baolin Wang <baolin.w...@linux.alibaba.com> > Cc: "Liam R. Howlett" <liam.howl...@oracle.com> > Cc: Nico Pache <npa...@redhat.com> > Cc: Ryan Roberts <ryan.robe...@arm.com> > Cc: Dev Jain <dev.j...@arm.com> > Cc: Barry Song <bao...@kernel.org> > Cc: Lyude Paul <ly...@redhat.com> > Cc: Danilo Krummrich <d...@kernel.org> > Cc: David Airlie <airl...@gmail.com> > Cc: Simona Vetter <sim...@ffwll.ch> > Cc: Ralph Campbell <rcampb...@nvidia.com> > Cc: Mika Penttilä <mpent...@redhat.com> > Cc: Matthew Brost <matthew.br...@intel.com> > Cc: Francois Dugast <francois.dug...@intel.com> > > Signed-off-by: Balbir Singh <balb...@nvidia.com> > --- > mm/migrate_device.c | 95 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 95 insertions(+) > > diff --git a/mm/migrate_device.c b/mm/migrate_device.c > index e05e14d6eacd..e58c3f9d01c8 100644 > --- a/mm/migrate_device.c > +++ b/mm/migrate_device.c > @@ -54,6 +54,54 @@ static int migrate_vma_collect_hole(unsigned long start, > return 0; > } > > +/** > + * migrate_vma_split_folio - Helper function to split a(n) (m)THP folio > + * > + * @folio - the folio to split > + * @fault_page - struct page associated with the fault if any > + * > + * Returns 0 on success > + */ > +static int migrate_vma_split_folio(struct folio *folio, > + struct page *fault_page) > +{ > + int ret; > + struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL; > + struct folio *new_fault_folio = NULL; > + > + if (folio != fault_folio) { > + folio_get(folio); > + folio_lock(folio); > + } > + > + ret = split_folio(folio); > + if (ret) { > + if (folio != fault_folio) { > + folio_unlock(folio); > + folio_put(folio); > + } > + return ret; > + } > + > + new_fault_folio = fault_page ? page_folio(fault_page) : NULL; > + > + /* > + * Ensure the lock is held on the correct > + * folio after the split > + */ > + if (!new_fault_folio) { > + folio_unlock(folio); > + folio_put(folio); > + } else if (folio != new_fault_folio) { > + folio_get(new_fault_folio); > + folio_lock(new_fault_folio); > + folio_unlock(folio); > + folio_put(folio); > + } > + > + return 0; > +} > + > static int migrate_vma_collect_pmd(pmd_t *pmdp, > unsigned long start, > unsigned long end, > @@ -136,6 +184,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > * page table entry. Other special swap entries are not > * migratable, and we ignore regular swapped page. > */ > + struct folio *folio; > + > entry = pte_to_swp_entry(pte); > if (!is_device_private_entry(entry)) > goto next; > @@ -147,6 +197,29 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > pgmap->owner != migrate->pgmap_owner) > goto next; > > + folio = page_folio(page); > + if (folio_test_large(folio)) { > + int ret; > + > + /* > + * The reason for finding pmd present with a > + * large folio for the pte is partial unmaps. > + * Split the folio now for the migration to be > + * handled correctly > + */ > + pte_unmap_unlock(ptep, ptl); > + ret = migrate_vma_split_folio(folio, > + migrate->fault_page); > + > + if (ret) { > + ptep = pte_offset_map_lock(mm, pmdp, > addr, &ptl); > + goto next; > + } > + > + addr = start; > + goto again; > + } > + > mpfn = migrate_pfn(page_to_pfn(page)) | > MIGRATE_PFN_MIGRATE; > if (is_writable_device_private_entry(entry)) > @@ -171,6 +244,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, > pgmap->owner != migrate->pgmap_owner) > goto next; > } > + folio = page_folio(page); > + if (folio_test_large(folio)) { > + int ret; > + > + /* > + * The reason for finding pmd present with a > + * large folio for the pte is partial unmaps. > + * Split the folio now for the migration to be > + * handled correctly > + */ There are other reasons like vma splits for various reasons. > + pte_unmap_unlock(ptep, ptl); > + ret = migrate_vma_split_folio(folio, > + migrate->fault_page); > + > + if (ret) { > + ptep = pte_offset_map_lock(mm, pmdp, > addr, &ptl); > + goto next; > + } > + > + addr = start; > + goto again; > + } > mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; > mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; > }