long min_hpages);
void hugepage_put_subpool(struct hugepage_subpool *spool);
-void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+void hugetlb_dup_vma_private(struct vm_area_struct *vma);
void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
pgd_t *pgd, int flags);
+void hugetlb_vma_lock_read(struct vm_area_struct *vma);
+void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
+void hugetlb_vma_lock_write(struct vm_area_struct *vma);
+void hugetlb_vma_unlock_write(struct vm_area_struct *vma);
+int hugetlb_vma_trylock_write(struct vm_area_struct *vma);
+void hugetlb_vma_assert_locked(struct vm_area_struct *vma);
+
int pmd_huge(pmd_t pmd);
int pud_huge(pud_t pud);
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
#else /* !CONFIG_HUGETLB_PAGE */
-static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
}
return -EINVAL;
}
+static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma)
+{
+}
+
+static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
+{
+}
+
+static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma)
+{
+}
+
+static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
+{
+}
+
+static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
+{
+ return 1;
+}
+
+static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
+{
+}
+
static inline int pmd_huge(pmd_t pmd)
{
return 0;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool)
{
return (get_vma_private_data(vma) & flag) != 0;
}
-/* Reset counters to 0 and clear all HPAGE_RESV_* flags */
-void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
+void hugetlb_dup_vma_private(struct vm_area_struct *vma)
{
VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+ /*
+ * Clear vm_private_data
+ * - For MAP_PRIVATE mappings, this is the reserve map which does
+ * not apply to children. Faults generated by the children are
+ * not guaranteed to succeed, even if read-only.
+ * - For shared mappings this is a per-vma semaphore that may be
+ * allocated below.
+ */
+ vma->vm_private_data = (void *)0;
if (!(vma->vm_flags & VM_MAYSHARE))
- vma->vm_private_data = (void *)0;
+ return;
+
+ /*
+ * Allocate semaphore if pmd sharing is possible.
+ */
+ hugetlb_vma_lock_alloc(vma);
}
/*
kref_put(&reservations->refs, resv_map_release);
}
- reset_vma_resv_huge_pages(vma);
+ hugetlb_dup_vma_private(vma);
}
/* Returns true if the VMA has associated reserve pages */
resv_map_dup_hugetlb_cgroup_uncharge_info(resv);
kref_get(&resv->refs);
}
+
+ hugetlb_vma_lock_alloc(vma);
}
static void hugetlb_vm_op_close(struct vm_area_struct *vma)
{
struct hstate *h = hstate_vma(vma);
- struct resv_map *resv = vma_resv_map(vma);
+ struct resv_map *resv;
struct hugepage_subpool *spool = subpool_vma(vma);
unsigned long reserve, start, end;
long gbl_reserve;
+ hugetlb_vma_lock_free(vma);
+
+ resv = vma_resv_map(vma);
if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
return;
return false;
}
+ /*
+ * vma specific semaphore used for pmd sharing synchronization
+ */
+ hugetlb_vma_lock_alloc(vma);
+
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
resv_map = inode_resv_map(inode);
chg = region_chg(resv_map, from, to, ®ions_needed);
-
} else {
/* Private mapping. */
resv_map = resv_map_alloc();
if (!resv_map)
- return false;
+ goto out_err;
chg = to - from;
hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h),
chg * pages_per_huge_page(h), h_cg);
out_err:
+ hugetlb_vma_lock_free(vma);
if (!vma || vma->vm_flags & VM_MAYSHARE)
/* Only call region_abort if the region_chg succeeded but the
* region_add failed or didn't run.
}
static bool __vma_aligned_range_pmd_shareable(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool check_vma_lock)
{
+#ifdef CONFIG_USERFAULTFD
+ if (uffd_disable_huge_pmd_share(vma))
+ return false;
+#endif
/*
* check on proper vm_flags and page table alignment
*/
- if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, start, end))
- return true;
- return false;
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return false;
+ if (check_vma_lock && !vma->vm_private_data)
+ return false;
+ if (!range_in_vma(vma, start, end))
+ return false;
+ return true;
+}
+
+static bool vma_pmd_shareable(struct vm_area_struct *vma)
+{
+ unsigned long start = ALIGN(vma->vm_start, PUD_SIZE),
+ end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
+
+ if (start >= end)
+ return false;
+
+ return __vma_aligned_range_pmd_shareable(vma, start, end, false);
}
static bool vma_addr_pmd_shareable(struct vm_area_struct *vma,
unsigned long start = addr & PUD_MASK;
unsigned long end = start + PUD_SIZE;
- return __vma_aligned_range_pmd_shareable(vma, start, end);
+ return __vma_aligned_range_pmd_shareable(vma, start, end, true);
}
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
{
-#ifdef CONFIG_USERFAULTFD
- if (uffd_disable_huge_pmd_share(vma))
- return false;
-#endif
return vma_addr_pmd_shareable(vma, addr);
}
*end = ALIGN(*end, PUD_SIZE);
}
+static bool __vma_shareable_flags_pmd(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_MAYSHARE | VM_SHARED) &&
+ vma->vm_private_data;
+}
+
+void hugetlb_vma_lock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma))
+ down_read((struct rw_semaphore *)vma->vm_private_data);
+}
+
+void hugetlb_vma_unlock_read(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma))
+ up_read((struct rw_semaphore *)vma->vm_private_data);
+}
+
+void hugetlb_vma_lock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma))
+ down_write((struct rw_semaphore *)vma->vm_private_data);
+}
+
+void hugetlb_vma_unlock_write(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma))
+ up_write((struct rw_semaphore *)vma->vm_private_data);
+}
+
+int hugetlb_vma_trylock_write(struct vm_area_struct *vma)
+{
+ if (!__vma_shareable_flags_pmd(vma))
+ return 1;
+
+ return down_write_trylock((struct rw_semaphore *)vma->vm_private_data);
+}
+
+void hugetlb_vma_assert_locked(struct vm_area_struct *vma)
+{
+ if (__vma_shareable_flags_pmd(vma))
+ lockdep_assert_held((struct rw_semaphore *)
+ vma->vm_private_data);
+}
+
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
+{
+ /*
+ * Only present in sharable vmas. See comment in
+ * __unmap_hugepage_range_final about the neeed to check both
+ * VM_SHARED and VM_MAYSHARE in free path
+ */
+ if (!vma || !(vma->vm_flags & (VM_MAYSHARE | VM_SHARED)))
+ return;
+
+ if (vma->vm_private_data) {
+ kfree(vma->vm_private_data);
+ vma->vm_private_data = NULL;
+ }
+}
+
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+ struct rw_semaphore *vma_sema;
+
+ /* Only establish in (flags) sharable vmas */
+ if (!vma || !(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ /* Should never get here with non-NULL vm_private_data */
+ if (vma->vm_private_data)
+ return;
+
+ /* Check size/alignment for pmd sharing possible */
+ if (!vma_pmd_shareable(vma))
+ return;
+
+ vma_sema = kmalloc(sizeof(*vma_sema), GFP_KERNEL);
+ if (!vma_sema)
+ /*
+ * If we can not allocate semaphore, then vma can not
+ * participate in pmd sharing.
+ */
+ return;
+
+ init_rwsem(vma_sema);
+ vma->vm_private_data = vma_sema;
+}
+
/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
}
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
+{
+}
+
+static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+{
+}
+
pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{