From 431da2d5f52e2a7b883e943cdf9c2cd85fb9b551 Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Mon, 24 Feb 2025 14:14:45 -0700 Subject: [PATCH] mm: make page_mapped_in_vma() hugetlb walk aware When a process consumes a UE in a page, the memory failure handler attempts to collect information for a potential SIGBUS. If the page is an anonymous page, page_mapped_in_vma(page, vma) is invoked in order to 1. retrieve the vaddr from the process' address space, 2. verify that the vaddr is indeed mapped to the poisoned page, where 'page' is the precise small page with UE. It's been observed that when injecting poison to a non-head subpage of an anonymous hugetlb page, no SIGBUS shows up, while injecting to the head page produces a SIGBUS. The cause is that, though hugetlb_walk() returns a valid pmd entry (on x86), but check_pte() detects mismatch between the head page per the pmd and the input subpage. Thus the vaddr is considered not mapped to the subpage and the process is not collected for SIGBUS purpose. This is the calling stack: collect_procs_anon page_mapped_in_vma page_vma_mapped_walk hugetlb_walk huge_pte_lock check_pte check_pte() header says that it "check if [pvmw->pfn, @pvmw->pfn + @pvmw->nr_pages) is mapped at the @pvmw->pte" but practically works only if pvmw->pfn is the head page pfn at pvmw->pte. Hindsight acknowledging that some pvmw->pte could point to a hugepage of some sort such that it makes sense to make check_pte() work for hugepage. Link: https://lkml.kernel.org/r/20250224211445.2663312-1-jane.chu@oracle.com Signed-off-by: Jane Chu Cc: Hugh Dickins Cc: Kirill A. Shuemov Cc: linmiaohe Cc: Matthew Wilcow (Oracle) Cc: Peter Xu Cc: Signed-off-by: Andrew Morton --- mm/page_vma_mapped.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 32679be22d30..e463c3be934a 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -84,6 +84,7 @@ again: * mapped at the @pvmw->pte * @pvmw: page_vma_mapped_walk struct, includes a pair pte and pfn range * for checking + * @pte_nr: the number of small pages described by @pvmw->pte. * * page_vma_mapped_walk() found a place where pfn range is *potentially* * mapped. check_pte() has to validate this. @@ -100,7 +101,7 @@ again: * Otherwise, return false. * */ -static bool check_pte(struct page_vma_mapped_walk *pvmw) +static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) { unsigned long pfn; pte_t ptent = ptep_get(pvmw->pte); @@ -132,7 +133,11 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) pfn = pte_pfn(ptent); } - return (pfn - pvmw->pfn) < pvmw->nr_pages; + if ((pfn + pte_nr - 1) < pvmw->pfn) + return false; + if (pfn > (pvmw->pfn + pvmw->nr_pages - 1)) + return false; + return true; } /* Returns true if the two ranges overlap. Careful to not overflow. */ @@ -207,7 +212,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return false; pvmw->ptl = huge_pte_lock(hstate, mm, pvmw->pte); - if (!check_pte(pvmw)) + if (!check_pte(pvmw, pages_per_huge_page(hstate))) return not_found(pvmw); return true; } @@ -290,7 +295,7 @@ restart: goto next_pte; } this_pte: - if (check_pte(pvmw)) + if (check_pte(pvmw, 1)) return true; next_pte: do { -- 2.49.0