From: Balbir Singh Date: Mon, 8 Sep 2025 00:04:35 +0000 (+1000) Subject: mm/huge_memory: add device-private THP support to PMD operations X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=de2f90aaf8b4548c815b30726fe8d758b5868e4d;p=users%2Fjedix%2Flinux-maple.git mm/huge_memory: add device-private THP support to PMD operations Extend core huge page management functions to handle device-private THP entries. This enables proper handling of large device-private folios in fundamental MM operations. The following functions have been updated: - copy_huge_pmd(): Handle device-private entries during fork/clone - zap_huge_pmd(): Properly free device-private THP during munmap - change_huge_pmd(): Support protection changes on device-private THP - __pte_offset_map(): Add device-private entry awareness Link: https://lkml.kernel.org/r/20250908000448.180088-3-balbirs@nvidia.com Signed-off-by: Matthew Brost Signed-off-by: Balbir Singh Cc: David Hildenbrand Cc: Zi Yan Cc: Joshua Hahn Cc: Rakie Kim Cc: Byungchul Park Cc: Gregory Price Cc: Ying Huang Cc: Alistair Popple Cc: Oscar Salvador Cc: Lorenzo Stoakes Cc: Baolin Wang Cc: "Liam R. Howlett" Cc: Nico Pache Cc: Ryan Roberts Cc: Dev Jain Cc: Barry Song Cc: Lyude Paul Cc: Danilo Krummrich Cc: David Airlie Cc: Simona Vetter Cc: Ralph Campbell Cc: Mika Penttilä Cc: Matthew Brost Cc: Francois Dugast Signed-off-by: Andrew Morton --- diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 64ea151a7ae3..59c5889a4d54 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -594,6 +594,33 @@ static inline int is_pmd_migration_entry(pmd_t pmd) } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) + +/** + * is_pmd_device_private_entry() - Check if PMD contains a device private swap entry + * @pmd: The PMD to check + * + * Returns true if the PMD contains a swap entry that represents a device private + * page mapping. This is used for zone device private pages that have been + * swapped out but still need special handling during various memory management + * operations. + * + * Return: 1 if PMD contains device private entry, 0 otherwise + */ +static inline int is_pmd_device_private_entry(pmd_t pmd) +{ + return is_swap_pmd(pmd) && is_device_private_entry(pmd_to_swp_entry(pmd)); +} + +#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +static inline int is_pmd_device_private_entry(pmd_t pmd) +{ + return 0; +} + +#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5acca24bbabb..03babad18978 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1703,8 +1703,11 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (unlikely(is_swap_pmd(pmd))) { swp_entry_t entry = pmd_to_swp_entry(pmd); - VM_BUG_ON(!is_pmd_migration_entry(pmd)); - if (!is_readable_migration_entry(entry)) { + VM_WARN_ON(!is_pmd_migration_entry(pmd) && + !is_pmd_device_private_entry(pmd)); + + if (is_migration_entry(entry) && + !is_readable_migration_entry(entry)) { entry = make_readable_migration_entry( swp_offset(entry)); pmd = swp_entry_to_pmd(entry); @@ -1713,7 +1716,37 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (pmd_swp_uffd_wp(*src_pmd)) pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(src_mm, addr, src_pmd, pmd); + } else if (is_device_private_entry(entry)) { + /* + * For device private entries, since there are no + * read exclusive entries, writable = !readable + */ + if (is_writable_device_private_entry(entry)) { + entry = make_readable_device_private_entry( + swp_offset(entry)); + pmd = swp_entry_to_pmd(entry); + + if (pmd_swp_soft_dirty(*src_pmd)) + pmd = pmd_swp_mksoft_dirty(pmd); + if (pmd_swp_uffd_wp(*src_pmd)) + pmd = pmd_swp_mkuffd_wp(pmd); + set_pmd_at(src_mm, addr, src_pmd, pmd); + } + + src_folio = pfn_swap_entry_folio(entry); + VM_WARN_ON(!folio_test_large(src_folio)); + + folio_get(src_folio); + /* + * folio_try_dup_anon_rmap_pmd does not fail for + * device private entries. + */ + ret = folio_try_dup_anon_rmap_pmd(src_folio, + &src_folio->page, + dst_vma, src_vma); + VM_WARN_ON(ret); } + add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); @@ -2211,15 +2244,17 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, folio_remove_rmap_pmd(folio, page, vma); WARN_ON_ONCE(folio_mapcount(folio) < 0); VM_BUG_ON_PAGE(!PageHead(page), page); - } else if (thp_migration_supported()) { + } else if (is_pmd_migration_entry(orig_pmd) || + is_pmd_device_private_entry(orig_pmd)) { swp_entry_t entry; - VM_BUG_ON(!is_pmd_migration_entry(orig_pmd)); entry = pmd_to_swp_entry(orig_pmd); folio = pfn_swap_entry_folio(entry); flush_needed = 0; - } else - WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + + if (!thp_migration_supported()) + WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!"); + } if (folio_test_anon(folio)) { zap_deposited_table(tlb->mm, pmd); @@ -2239,6 +2274,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, folio_mark_accessed(folio); } + if (folio_is_device_private(folio)) { + folio_remove_rmap_pmd(folio, &folio->page, vma); + WARN_ON_ONCE(folio_mapcount(folio) < 0); + folio_put(folio); + } + spin_unlock(ptl); if (flush_needed) tlb_remove_page_size(tlb, &folio->page, HPAGE_PMD_SIZE); @@ -2367,7 +2408,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, struct folio *folio = pfn_swap_entry_folio(entry); pmd_t newpmd; - VM_BUG_ON(!is_pmd_migration_entry(*pmd)); + VM_WARN_ON(!is_pmd_migration_entry(*pmd) && + !folio_is_device_private(folio)); if (is_writable_migration_entry(entry)) { /* * A protection check is difficult so @@ -2380,6 +2422,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, newpmd = swp_entry_to_pmd(entry); if (pmd_swp_soft_dirty(*pmd)) newpmd = pmd_swp_mksoft_dirty(newpmd); + } else if (is_writable_device_private_entry(entry)) { + entry = make_readable_device_private_entry( + swp_offset(entry)); + newpmd = swp_entry_to_pmd(entry); } else { newpmd = *pmd; } diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 567e2d084071..604e8206a2ec 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -292,6 +292,12 @@ pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) *pmdvalp = pmdval; if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval))) goto nomap; + if (is_swap_pmd(pmdval)) { + swp_entry_t entry = pmd_to_swp_entry(pmdval); + + if (is_device_private_entry(entry)) + goto nomap; + } if (unlikely(pmd_trans_huge(pmdval))) goto nomap; if (unlikely(pmd_bad(pmdval))) {