mm: migration: fix migration of huge PMD shared pages

author Mike Kravetz <mike.kravetz@oracle.com>

Wed, 21 Nov 2018 01:20:31 +0000 (17:20 -0800)

committer Brian Maly <brian.maly@oracle.com>

Mon, 24 Dec 2018 02:16:13 +0000 (21:16 -0500)
author Mike Kravetz <mike.kravetz@oracle.com>
Wed, 21 Nov 2018 01:20:31 +0000 (17:20 -0800)
committer Brian Maly <brian.maly@oracle.com>
Mon, 24 Dec 2018 02:16:13 +0000 (21:16 -0500)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h

index 5992a044842d318903af0a3c617c3fef94141bd2..03446d3709961f756e45895da0b1d6f9f1278015 100644 (file)
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -116,6 +116,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                         unsigned long addr, unsigned long sz);
  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
  int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end);
  struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
                               int write);
  struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
@@ -138,6 +140,18 @@ static inline unsigned long hugetlb_total_pages(void)
         return 0;
  }
  
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+                                               pte_t *ptep)
+{
+       return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+                               struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
+
  #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)        ({ BUG(); 0; })
  #define follow_huge_addr(mm, addr, write)      ERR_PTR(-EINVAL)
  #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 3d9952e28cd4191dc94eadedc30bed9c318760d7..5befc80071edb0dac313f55817a0a17ad1d65559 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2073,6 +2073,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
         return vma;
  }
  
+static inline bool range_in_vma(struct vm_area_struct *vma,
+                               unsigned long start, unsigned long end)
+{
+       return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
  #ifdef CONFIG_MMU
  pgprot_t vm_get_page_prot(unsigned long vm_flags);
  void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index c972acfe94616b3f64bceaab0acb3cb0ef477fa4..baeed4157a6b7cc28e021e6c09822c213f3f1e31 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4394,12 +4394,40 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
         /*
          * check on proper vm_flags and page table alignment
          */
-       if (vma->vm_flags & VM_MAYSHARE &&
-           vma->vm_start <= base && end <= vma->vm_end)
+       if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
                 return 1;
         return 0;
  }
  
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+       unsigned long check_addr = *start;
+
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+
+       for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+               unsigned long a_start = check_addr & PUD_MASK;
+               unsigned long a_end = a_start + PUD_SIZE;
+
+               /*
+                * If sharing is possible, adjust start/end if necessary
+                */
+               if (range_in_vma(vma, a_start, a_end)) {
+                       if (a_start < *start)
+                               *start = a_start;
+                       if (a_end > *end)
+                               *end = a_end;
+               }
+       }
+}
+
  /*
   * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
   * and returns the corresponding pte. While this is not necessary for the
@@ -4492,6 +4520,11 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
  {
         return NULL;
  }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+                               unsigned long *start, unsigned long *end)
+{
+}
  #define want_pmd_share()       (0)
  #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
  
diff --git a/mm/rmap.c b/mm/rmap.c

index 80b34078ff6198fe357a22b151b0f7b2c6e8c3d1..ea268b4f5585d5dbf440b6d1e132cfaaf871c669 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1191,8 +1191,37 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         pte_t pteval;
         spinlock_t *ptl;
         int ret = SWAP_AGAIN;
+       unsigned long sh_address;
+       bool pmd_sharing_possible = false;
+       unsigned long spmd_start, spmd_end;
         enum ttu_flags flags = (enum ttu_flags)arg;
  
+       /*
+        * Only use the range_start/end mmu notifiers if huge pmd sharing
+        * is possible.  In the normal case, mmu_notifier_invalidate_page
+        * is sufficient as we only unmap a page.  However, if we unshare
+        * a pmd, we will unmap a PUD_SIZE range.
+        */
+       if (PageHuge(page)) {
+               spmd_start = address;
+               spmd_end = spmd_start + vma_mmu_pagesize(vma);
+
+               /*
+                * Check if pmd sharing is possible.  If possible, we could
+                * unmap a PUD_SIZE range.  spmd_start/spmd_end will be
+                * modified if sharing is possible.
+                */
+               adjust_range_if_pmd_sharing_possible(vma, &spmd_start,
+                                                               &spmd_end);
+               if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) {
+                       sh_address = address;
+
+                       pmd_sharing_possible = true;
+                       mmu_notifier_invalidate_range_start(vma->vm_mm,
+                                                       spmd_start, spmd_end);
+               }
+       }
+
         pte = page_check_address(page, mm, address, &ptl, 0);
         if (!pte)
                 goto out;
@@ -1216,6 +1245,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                 }
         }
  
+       /*
+        * Call huge_pmd_unshare to potentially unshare a huge pmd.  Pass
+        * sh_address as it will be modified if unsharing is successful.
+        */
+       if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) {
+               /*
+                * huge_pmd_unshare unmapped an entire PMD page.  There is
+                * no way of knowing exactly which PMDs may be cached for
+                * this mm, so flush them all.  spmd_start/spmd_end cover
+                * this PUD_SIZE range.
+                */
+               flush_cache_range(vma, spmd_start, spmd_end);
+               flush_tlb_range(vma, spmd_start, spmd_end);
+
+               /*
+                * The ref count of the PMD page was dropped which is part
+                * of the way map counting is done for shared PMDs.  When
+                * there is no other sharing, huge_pmd_unshare returns false
+                * and we will unmap the actual page and drop map count
+                * to zero.
+                */
+               goto out_unmap;
+       }
+
         /* Nuke the page table entry. */
         flush_cache_page(vma, address, page_to_pfn(page));
         pteval = ptep_clear_flush(vma, address, pte);
@@ -1301,6 +1354,9 @@ out_unmap:
         if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK))
                 mmu_notifier_invalidate_page(mm, address);
  out:
+       if (pmd_sharing_possible)
+               mmu_notifier_invalidate_range_end(vma->vm_mm,
+                                                       spmd_start, spmd_end);
         return ret;
  
  out_mlock:
author	Mike Kravetz <mike.kravetz@oracle.com>
	Wed, 21 Nov 2018 01:20:31 +0000 (17:20 -0800)
committer	Brian Maly <brian.maly@oracle.com>
	Mon, 24 Dec 2018 02:16:13 +0000 (21:16 -0500)
include/linux/hugetlb.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history