From: Baolin Wang Date: Tue, 10 May 2022 01:20:53 +0000 (-0700) Subject: mm: rmap: move the cache flushing to the correct place for hugetlb PMD sharing X-Git-Tag: v5.19-rc1~138^2~191 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=54205e9c5425049aef1bc7a812f890f00b5f79c7;p=users%2Fhch%2Fxfs.git mm: rmap: move the cache flushing to the correct place for hugetlb PMD sharing The cache level flush will always be first when changing an existing virtual–>physical mapping to a new value, since this allows us to properly handle systems whose caches are strict and require a virtual–>physical translation to exist for a virtual address. So we should move the cache flushing before huge_pmd_unshare(). As Muchun pointed out[1], now the architectures whose supporting hugetlb PMD sharing have no cache flush issues in practice. But I think we should still follow the cache/TLB flushing rules when changing a valid virtual address mapping in case of potential issues in future. [1] https://lore.kernel.org/all/YmT%2F%2FhuUbFX+KHcy@FVFYT0MHHV2J.usts.net/ Link: https://lkml.kernel.org/r/4f7ae6dfdc838ab71e1655188b657c032ff1f28f.1651056365.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Mike Kravetz Cc: Mina Almasry Cc: Muchun Song Signed-off-by: Andrew Morton --- diff --git a/mm/rmap.c b/mm/rmap.c index 86ed2865210d..829b2f9486c6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1532,15 +1532,16 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); + /* + * huge_pmd_unshare may unmap an entire PMD page. + * There is no way of knowing exactly which PMDs may + * be cached for this mm, so we must flush them all. + * start/end were already adjusted above to cover this + * range. + */ + flush_cache_range(vma, range.start, range.end); + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { - /* - * huge_pmd_unshare unmapped an entire PMD - * page. There is no way of knowing exactly - * which PMDs may be cached for this mm, so - * we must flush them all. start/end were - * already adjusted above to cover this range. - */ - flush_cache_range(vma, range.start, range.end); flush_tlb_range(vma, range.start, range.end); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -1557,13 +1558,14 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, page_vma_mapped_walk_done(&pvmw); break; } + } else { + flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); } /* * Nuke the page table entry. When having to clear * PageAnonExclusive(), we always have to flush. */ - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags) && !anon_exclusive) { /* * We clear the PTE but do not flush so potentially @@ -1884,15 +1886,16 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); + /* + * huge_pmd_unshare may unmap an entire PMD page. + * There is no way of knowing exactly which PMDs may + * be cached for this mm, so we must flush them all. + * start/end were already adjusted above to cover this + * range. + */ + flush_cache_range(vma, range.start, range.end); + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { - /* - * huge_pmd_unshare unmapped an entire PMD - * page. There is no way of knowing exactly - * which PMDs may be cached for this mm, so - * we must flush them all. start/end were - * already adjusted above to cover this range. - */ - flush_cache_range(vma, range.start, range.end); flush_tlb_range(vma, range.start, range.end); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -1909,10 +1912,11 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, page_vma_mapped_walk_done(&pvmw); break; } + } else { + flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); } /* Nuke the page table entry. */ - flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); pteval = ptep_clear_flush(vma, address, pvmw.pte); /* Set the dirty flag on the folio now the pte is gone. */