]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm/huge_memory: add vmf_insert_folio_pmd()
authorAlistair Popple <apopple@nvidia.com>
Tue, 18 Feb 2025 03:55:32 +0000 (14:55 +1100)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 28 Feb 2025 01:00:15 +0000 (17:00 -0800)
Currently DAX folio/page reference counts are managed differently to
normal pages.  To allow these to be managed the same as normal pages
introduce vmf_insert_folio_pmd.  This will map the entire PMD-sized folio
and take references as it would for a normally mapped page.

This is distinct from the current mechanism, vmf_insert_pfn_pmd, which
simply inserts a special devmap PMD entry into the page table without
holding a reference to the page for the mapping.

It is not currently useful to implement a more generic vmf_insert_folio()
which selects the correct behaviour based on folio_order().  This is
because PTE faults require only a subpage of the folio to be PTE mapped
rather than the entire folio.  It would be possible to add this context
somewhere but callers already need to handle PTE faults and PMD faults
separately so a more generic function is not useful.

Link: https://lkml.kernel.org/r/16b232f09b2e678ef5c21eb74f21794a06422ce4.1739850794.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Acked-by: David Hildenbrand <david@redhat.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Dan Wiliams <dan.j.williams@intel.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
mm/huge_memory.c

index 4eeb54b5062165d073adc6f771e408bd9eaa8cd3..e57e811cfd3ccabbb11b1431c676b9132de9a952 100644 (file)
@@ -39,6 +39,8 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
+                               bool write);
 vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
                                bool write);
 
index bc1deccbfee0ce724ed85e8b7fd9339bc43318f4..03848c9ab7738d6669fee64704708e5a703cfaa9 100644 (file)
@@ -1375,20 +1375,20 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
        return __do_huge_pmd_anonymous_page(vmf);
 }
 
-static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+static int insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
                pmd_t *pmd, pfn_t pfn, pgprot_t prot, bool write,
                pgtable_t pgtable)
 {
        struct mm_struct *mm = vma->vm_mm;
        pmd_t entry;
-       spinlock_t *ptl;
 
-       ptl = pmd_lock(mm, pmd);
+       lockdep_assert_held(pmd_lockptr(mm, pmd));
+
        if (!pmd_none(*pmd)) {
                if (write) {
                        if (pmd_pfn(*pmd) != pfn_t_to_pfn(pfn)) {
                                WARN_ON_ONCE(!is_huge_zero_pmd(*pmd));
-                               goto out_unlock;
+                               return -EEXIST;
                        }
                        entry = pmd_mkyoung(*pmd);
                        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
@@ -1396,7 +1396,7 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
                                update_mmu_cache_pmd(vma, addr, pmd);
                }
 
-               goto out_unlock;
+               return -EEXIST;
        }
 
        entry = pmd_mkhuge(pfn_t_pmd(pfn, prot));
@@ -1412,16 +1412,11 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
        if (pgtable) {
                pgtable_trans_huge_deposit(mm, pmd, pgtable);
                mm_inc_nr_ptes(mm);
-               pgtable = NULL;
        }
 
        set_pmd_at(mm, addr, pmd, entry);
        update_mmu_cache_pmd(vma, addr, pmd);
-
-out_unlock:
-       spin_unlock(ptl);
-       if (pgtable)
-               pte_free(mm, pgtable);
+       return 0;
 }
 
 /**
@@ -1440,6 +1435,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
        struct vm_area_struct *vma = vmf->vma;
        pgprot_t pgprot = vma->vm_page_prot;
        pgtable_t pgtable = NULL;
+       spinlock_t *ptl;
+       int error;
 
        /*
         * If we had pmd_special, we could avoid all these restrictions,
@@ -1462,12 +1459,56 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
        }
 
        track_pfn_insert(vma, &pgprot, pfn);
+       ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+       error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write,
+                       pgtable);
+       spin_unlock(ptl);
+       if (error && pgtable)
+               pte_free(vma->vm_mm, pgtable);
 
-       insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
        return VM_FAULT_NOPAGE;
 }
 EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
 
+vm_fault_t vmf_insert_folio_pmd(struct vm_fault *vmf, struct folio *folio,
+                               bool write)
+{
+       struct vm_area_struct *vma = vmf->vma;
+       unsigned long addr = vmf->address & PMD_MASK;
+       struct mm_struct *mm = vma->vm_mm;
+       spinlock_t *ptl;
+       pgtable_t pgtable = NULL;
+       int error;
+
+       if (addr < vma->vm_start || addr >= vma->vm_end)
+               return VM_FAULT_SIGBUS;
+
+       if (WARN_ON_ONCE(folio_order(folio) != PMD_ORDER))
+               return VM_FAULT_SIGBUS;
+
+       if (arch_needs_pgtable_deposit()) {
+               pgtable = pte_alloc_one(vma->vm_mm);
+               if (!pgtable)
+                       return VM_FAULT_OOM;
+       }
+
+       ptl = pmd_lock(mm, vmf->pmd);
+       if (pmd_none(*vmf->pmd)) {
+               folio_get(folio);
+               folio_add_file_rmap_pmd(folio, &folio->page, vma);
+               add_mm_counter(mm, mm_counter_file(folio), HPAGE_PMD_NR);
+       }
+       error = insert_pfn_pmd(vma, addr, vmf->pmd,
+                       pfn_to_pfn_t(folio_pfn(folio)), vma->vm_page_prot,
+                       write, pgtable);
+       spin_unlock(ptl);
+       if (error && pgtable)
+               pte_free(mm, pgtable);
+
+       return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL_GPL(vmf_insert_folio_pmd);
+
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma)
 {