]> www.infradead.org Git - nvme.git/commitdiff
fs/proc/task_mmu: account non-present entries as "maybe shared, but no idea how often"
authorDavid Hildenbrand <david@redhat.com>
Fri, 7 Jun 2024 12:23:55 +0000 (14:23 +0200)
committerAndrew Morton <akpm@linux-foundation.org>
Thu, 4 Jul 2024 02:30:06 +0000 (19:30 -0700)
We currently rely on mapcount information for pages referenced by
non-present entries to calculate the USS (shared vs.  private) and the
PSS.

However, relying on mapcounts for non-present entries doesn't make any
sense.  We have to treat such entries as "maybe shared, but no idea how
often", implying that they will *not* get accounted towards the USS, and
will get fully accounted to the PSS (no idea how often shared).

There is one exception: device exclusive entries essentially behave like
present entries (e.g., mapcount incremented).

In smaps_pmd_entry(), use is_pfn_swap_entry() instead of
is_migration_entry(), which should not make a real difference but makes
the code look more similar to the PTE variant.

While at it, adjust the comments in smaps_account().

Link: https://lkml.kernel.org/r/20240607122357.115423-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Oscar Salvador <osalvador@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
fs/proc/task_mmu.c

index a45f2da0ada0d0caf386b2c0e62f15ab08772c18..0399e55dcb7d71000f5c78a8f214b7f4c7369cec 100644 (file)
@@ -442,7 +442,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
 
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
                bool compound, bool young, bool dirty, bool locked,
-               bool migration)
+               bool present)
 {
        struct folio *folio = page_folio(page);
        int i, nr = compound ? compound_nr(page) : 1;
@@ -471,22 +471,27 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
         * Then accumulate quantities that may depend on sharing, or that may
         * differ page-by-page.
         *
-        * refcount == 1 guarantees the page is mapped exactly once.
-        * If any subpage of the compound page mapped with PTE it would elevate
-        * the refcount.
+        * refcount == 1 for present entries guarantees that the folio is mapped
+        * exactly once. For large folios this implies that exactly one
+        * PTE/PMD/... maps (a part of) this folio.
         *
-        * The page_mapcount() is called to get a snapshot of the mapcount.
-        * Without holding the page lock this snapshot can be slightly wrong as
-        * we cannot always read the mapcount atomically.  It is not safe to
-        * call page_mapcount() even with PTL held if the page is not mapped,
-        * especially for migration entries.  Treat regular migration entries
-        * as mapcount == 1.
+        * Treat all non-present entries (where relying on the mapcount and
+        * refcount doesn't make sense) as "maybe shared, but not sure how
+        * often". We treat device private entries as being fake-present.
+        *
+        * Note that it would not be safe to read the mapcount especially for
+        * pages referenced by migration entries, even with the PTL held.
         */
-       if ((folio_ref_count(folio) == 1) || migration) {
+       if (folio_ref_count(folio) == 1 || !present) {
                smaps_page_accumulate(mss, folio, size, size << PSS_SHIFT,
-                               dirty, locked, true);
+                                     dirty, locked, present);
                return;
        }
+       /*
+        * The page_mapcount() is called to get a snapshot of the mapcount.
+        * Without holding the folio lock this snapshot can be slightly wrong as
+        * we cannot always read the mapcount atomically.
+        */
        for (i = 0; i < nr; i++, page++) {
                int mapcount = page_mapcount(page);
                unsigned long pss = PAGE_SIZE << PSS_SHIFT;
@@ -531,13 +536,14 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
        struct vm_area_struct *vma = walk->vma;
        bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page = NULL;
-       bool migration = false, young = false, dirty = false;
+       bool present = false, young = false, dirty = false;
        pte_t ptent = ptep_get(pte);
 
        if (pte_present(ptent)) {
                page = vm_normal_page(vma, addr, ptent);
                young = pte_young(ptent);
                dirty = pte_dirty(ptent);
+               present = true;
        } else if (is_swap_pte(ptent)) {
                swp_entry_t swpent = pte_to_swp_entry(ptent);
 
@@ -555,8 +561,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
                                mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
                        }
                } else if (is_pfn_swap_entry(swpent)) {
-                       if (is_migration_entry(swpent))
-                               migration = true;
+                       if (is_device_private_entry(swpent))
+                               present = true;
                        page = pfn_swap_entry_to_page(swpent);
                }
        } else {
@@ -567,7 +573,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
        if (!page)
                return;
 
-       smaps_account(mss, page, false, young, dirty, locked, migration);
+       smaps_account(mss, page, false, young, dirty, locked, present);
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -578,18 +584,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        struct vm_area_struct *vma = walk->vma;
        bool locked = !!(vma->vm_flags & VM_LOCKED);
        struct page *page = NULL;
+       bool present = false;
        struct folio *folio;
-       bool migration = false;
 
        if (pmd_present(*pmd)) {
                page = vm_normal_page_pmd(vma, addr, *pmd);
+               present = true;
        } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
 
-               if (is_migration_entry(entry)) {
-                       migration = true;
+               if (is_pfn_swap_entry(entry))
                        page = pfn_swap_entry_to_page(entry);
-               }
        }
        if (IS_ERR_OR_NULL(page))
                return;
@@ -604,7 +609,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
                mss->file_thp += HPAGE_PMD_SIZE;
 
        smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
-                     locked, migration);
+                     locked, present);
 }
 #else
 static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
@@ -735,17 +740,21 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
        struct vm_area_struct *vma = walk->vma;
        pte_t ptent = huge_ptep_get(pte);
        struct folio *folio = NULL;
+       bool present = false;
 
        if (pte_present(ptent)) {
                folio = page_folio(pte_page(ptent));
+               present = true;
        } else if (is_swap_pte(ptent)) {
                swp_entry_t swpent = pte_to_swp_entry(ptent);
 
                if (is_pfn_swap_entry(swpent))
                        folio = pfn_swap_entry_folio(swpent);
        }
+
        if (folio) {
-               if (folio_likely_mapped_shared(folio) ||
+               /* We treat non-present entries as "maybe shared". */
+               if (!present || folio_likely_mapped_shared(folio) ||
                    hugetlb_pmd_shared(pte))
                        mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
                else