]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm/memory/fault: add THP fault handling for zone device private pages
authorBalbir Singh <balbirs@nvidia.com>
Mon, 8 Sep 2025 00:04:40 +0000 (10:04 +1000)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 12 Sep 2025 00:26:06 +0000 (17:26 -0700)
Implement CPU fault handling for zone device THP entries through
do_huge_pmd_device_private(), enabling transparent migration of
device-private large pages back to system memory on CPU access.

When the CPU accesses a zone device THP entry, the fault handler calls the
device driver's migrate_to_ram() callback to migrate the entire large page
back to system memory.

Link: https://lkml.kernel.org/r/20250908000448.180088-8-balbirs@nvidia.com
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
mm/huge_memory.c
mm/memory.c

index f327d62fc9852d3cf717e633ed9b39deecfa0f20..2d669be7f1c800b607eb2ff58cb1cd8ab3c84283 100644 (file)
@@ -496,6 +496,8 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
 
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
 
+vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf);
+
 extern struct folio *huge_zero_folio;
 extern unsigned long huge_zero_pfn;
 
@@ -671,6 +673,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
        return 0;
 }
 
+static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf)
+{
+       return 0;
+}
+
 static inline bool is_huge_zero_folio(const struct folio *folio)
 {
        return false;
index f059ff4ec70729413783d997d51fb405528af196..094447965171e27e6ee1cf1ba37c2a0b585c23c3 100644 (file)
@@ -1287,6 +1287,42 @@ release:
 
 }
 
+vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       vm_fault_t ret = 0;
+       spinlock_t *ptl;
+       swp_entry_t swp_entry;
+       struct page *page;
+
+       if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+               vma_end_read(vma);
+               return VM_FAULT_RETRY;
+       }
+
+       ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) {
+               spin_unlock(ptl);
+               return 0;
+       }
+
+       swp_entry = pmd_to_swp_entry(vmf->orig_pmd);
+       page = pfn_swap_entry_to_page(swp_entry);
+       vmf->page = page;
+       vmf->pte = NULL;
+       if (trylock_page(vmf->page)) {
+               get_page(page);
+               spin_unlock(ptl);
+               ret = page_pgmap(page)->ops->migrate_to_ram(vmf);
+               unlock_page(vmf->page);
+               put_page(page);
+       } else {
+               spin_unlock(ptl);
+       }
+
+       return ret;
+}
+
 /*
  * always: directly stall for all thp allocations
  * defer: wake kswapd and fail if not immediately available
index 9ec83417affc4215f7dae80b3eef07142a8bddfe..3e0404bd57a02c702410b50cc826ab3d0c3d61c2 100644 (file)
@@ -6303,8 +6303,10 @@ retry_pud:
                vmf.orig_pmd = pmdp_get_lockless(vmf.pmd);
 
                if (unlikely(is_swap_pmd(vmf.orig_pmd))) {
-                       VM_BUG_ON(thp_migration_supported() &&
-                                         !is_pmd_migration_entry(vmf.orig_pmd));
+                       if (is_device_private_entry(
+                                       pmd_to_swp_entry(vmf.orig_pmd)))
+                               return do_huge_pmd_device_private(&vmf);
+
                        if (is_pmd_migration_entry(vmf.orig_pmd))
                                pmd_migration_entry_wait(mm, vmf.pmd);
                        return 0;