hugetlb: take PMD sharing into account when flushing tlb/caches

author Mike Kravetz <mike.kravetz@oracle.com>

Thu, 30 Aug 2018 23:27:48 +0000 (16:27 -0700)

committer Brian Maly <brian.maly@oracle.com>

Mon, 24 Dec 2018 02:16:26 +0000 (21:16 -0500)
author Mike Kravetz <mike.kravetz@oracle.com>
Thu, 30 Aug 2018 23:27:48 +0000 (16:27 -0700)
committer Brian Maly <brian.maly@oracle.com>
Mon, 24 Dec 2018 02:16:26 +0000 (21:16 -0500)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index baeed4157a6b7cc28e021e6c09822c213f3f1e31..3fe5b38a59c09bf42f52140365b8a13156dade68 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3217,14 +3217,19 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
         struct page *page;
         struct hstate *h = hstate_vma(vma);
         unsigned long sz = huge_page_size(h);
-       const unsigned long mmun_start = start; /* For mmu_notifiers */
-       const unsigned long mmun_end   = end;   /* For mmu_notifiers */
+       unsigned long mmun_start = start;       /* For mmu_notifiers */
+       unsigned long mmun_end   = end;         /* For mmu_notifiers */
  
         WARN_ON(!is_vm_hugetlb_page(vma));
         BUG_ON(start & ~huge_page_mask(h));
         BUG_ON(end & ~huge_page_mask(h));
  
         tlb_start_vma(tlb, vma);
+
+       /*
+        * If sharing possible, alert mmu notifiers of worst case.
+        */
+       adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
         mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
         address = start;
  again:
@@ -3235,6 +3240,10 @@ again:
  
                 ptl = huge_pte_lock(h, mm, ptep);
                 if (huge_pmd_unshare(mm, &address, ptep))
+                       /*
+                        * We just unmapped a page of PMDs by clearing a PUD.
+                        * The caller's TLB flush range should cover this area.
+                        */
                         goto unlock;
  
                 pte = huge_ptep_get(ptep);
@@ -3327,12 +3336,22 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  {
         struct mm_struct *mm;
         struct mmu_gather tlb;
+       unsigned long tlb_start = start;
+       unsigned long tlb_end = end;
  
+       /*
+        * If shared PMDs were possibly used within this vma range, adjust
+        * start/end for worst case tlb flushing.
+        * Note that we can not be sure if PMDs are shared until we try to
+        * unmap pages.  However, we want to make sure TLB flushing covers
+        * the largest possible range.
+        */
+       adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
         mm = vma->vm_mm;
  
-       tlb_gather_mmu(&tlb, mm, start, end);
+       tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
         __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
-       tlb_finish_mmu(&tlb, start, end);
+       tlb_finish_mmu(&tlb, tlb_start, tlb_end);
  }
  
  /*
@@ -4153,11 +4172,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
         pte_t pte;
         struct hstate *h = hstate_vma(vma);
         unsigned long pages = 0;
+       unsigned long f_start = start;
+       unsigned long f_end = end;
+       bool shared_pmd = false;
+
+       /*
+        * In the case of shared PMDs, the area to flush could be beyond
+        * start/end.  Set f_start/f_end to cover the maximum possible
+        * range if PMD sharing is possible.
+        */
+       adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
  
         BUG_ON(address >= end);
-       flush_cache_range(vma, address, end);
+       flush_cache_range(vma, f_start, f_end);
  
-       mmu_notifier_invalidate_range_start(mm, start, end);
+       mmu_notifier_invalidate_range_start(mm, f_start, f_end);
         i_mmap_lock_write(vma->vm_file->f_mapping);
         for (; address < end; address += huge_page_size(h)) {
                 spinlock_t *ptl;
@@ -4168,6 +4197,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                 if (huge_pmd_unshare(mm, &address, ptep)) {
                         pages++;
                         spin_unlock(ptl);
+                       shared_pmd = true;
                         continue;
                 }
                 pte = huge_ptep_get(ptep);
@@ -4202,12 +4232,16 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
          * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
          * may have cleared our pud entry and done put_page on the page table:
          * once we release i_mmap_rwsem, another task can do the final put_page
-        * and that page table be reused and filled with junk.
+        * and that page table be reused and filled with junk.  If we actually
+        * did unshare a page of pmds, flush the range corresponding to the pud.
          */
-       flush_tlb_range(vma, start, end);
-       mmu_notifier_invalidate_range(mm, start, end);
+       if (shared_pmd)
+               flush_tlb_range(vma, f_start, f_end);
+       else
+               flush_tlb_range(vma, start, end);
+       mmu_notifier_invalidate_range(mm, f_start, f_end);
         i_mmap_unlock_write(vma->vm_file->f_mapping);
-       mmu_notifier_invalidate_range_end(mm, start, end);
+       mmu_notifier_invalidate_range_end(mm, f_start, f_end);
  
         return pages << h->order;
  }
author	Mike Kravetz <mike.kravetz@oracle.com>
	Thu, 30 Aug 2018 23:27:48 +0000 (16:27 -0700)
committer	Brian Maly <brian.maly@oracle.com>
	Mon, 24 Dec 2018 02:16:26 +0000 (21:16 -0500)