return 0;
}
+static int hugetlb_mfill_pte_continue(struct vm_area_struct *dst_vma,
+ unsigned long dst_addr, uffd_flags_t flags,
+ unsigned long increment)
+{
+ struct mm_struct *dst_mm = dst_vma->vm_mm;
+ bool wp_enabled = (flags & MFILL_ATOMIC_WP);
+ struct hstate *h = hstate_vma(dst_vma);
+ struct address_space *mapping = dst_vma->vm_file->f_mapping;
+ pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+ unsigned long size = huge_page_size(h);
+ int vm_shared = dst_vma->vm_flags & VM_SHARED;
+ pte_t _dst_pte;
+ pte_t *dst_pte;
+ spinlock_t *ptl;
+ int ret;
+ struct folio *folio;
+ u32 hash;
+
+ ret = hugetlb_mfill_prepare(dst_vma, dst_addr, increment,
+ &dst_pte, &hash, flags);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
+ if (IS_ERR(folio))
+ goto out;
+
+ /*
+ * If we just allocated a new page, we need a memory barrier to ensure
+ * that preceding stores to the page become visible before the
+ * set_pte_at() write. The memory barrier inside __folio_mark_uptodate
+ * is what we need.
+ *
+ * In the case where we have not allocated a new page (is_continue),
+ * the page must already be uptodate. UFFDIO_CONTINUE already includes
+ * an earlier smp_wmb() to ensure that prior stores will be visible
+ * before the set_pte_at() write.
+ */
+ WARN_ON_ONCE(!folio_test_uptodate(folio));
+ ptl = huge_pte_lock(h, dst_mm, dst_pte);
+ ret = -EIO;
+ if (folio_test_hwpoison(folio))
+ goto out_release_unlock;
+
+ /*
+ * We allow to overwrite a pte marker: consider when both MISSING|WP
+ * registered, we firstly wr-protect a none pte which has no page cache
+ * page backing it, then access the page.
+ */
+ ret = -EEXIST;
+ if (!huge_pte_none_mostly(huge_ptep_get(dst_mm, dst_addr, dst_pte)))
+ goto out_release_unlock;
+
+ hugetlb_add_file_rmap(folio);
+
+ /*
+ * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
+ * with wp flag set, don't set pte write bit.
+ */
+ _dst_pte = make_huge_pte(dst_vma, folio, !wp_enabled && vm_shared);
+ /*
+ * Always mark UFFDIO_COPY page dirty; note that this may not be
+ * extremely important for hugetlbfs for now since swapping is not
+ * supported, but we should still be clear in that this page cannot be
+ * thrown away at will, even if write bit not set.
+ */
+ _dst_pte = huge_pte_mkdirty(_dst_pte);
+ _dst_pte = pte_mkyoung(_dst_pte);
+
+ if (wp_enabled)
+ _dst_pte = huge_pte_mkuffd_wp(_dst_pte);
+
+ set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte, size);
+
+ hugetlb_count_add(pages_per_huge_page(h), dst_mm);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+
+ spin_unlock(ptl);
+ if (vm_shared)
+ folio_unlock(folio);
+ ret = 0;
+out:
+ hugetlb_vma_unlock_read(dst_vma);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ return ret;
+out_release_unlock:
+ spin_unlock(ptl);
+ if (vm_shared)
+ folio_unlock(folio);
+ folio_put(folio);
+ goto out;
+}
/*
* Used by userfaultfd UFFDIO_* ioctls. Based on userfaultfd's mfill_atomic_pte
* with modifications for hugetlb pages.
bool folio_in_pagecache = false;
u32 hash;
+ if (is_continue)
+ return hugetlb_mfill_pte_continue(dst_vma, dst_addr, flags,
+ increment);
+
ret = hugetlb_mfill_prepare(dst_vma, dst_addr, increment,
&dst_pte, &hash, flags);
if (ret)
return ret;
- if (is_continue) {
- ret = -EFAULT;
- folio = filemap_lock_hugetlb_folio(h, mapping, idx);
- if (IS_ERR(folio))
- goto out;
- folio_in_pagecache = true;
- } else if (!*foliop) {
+ if (!*foliop) {
/* If a folio already exists, then it's UFFDIO_COPY for
* a non-missing case. Return -EEXIST.
*/
* an earlier smp_wmb() to ensure that prior stores will be visible
* before the set_pte_at() write.
*/
- if (!is_continue)
- __folio_mark_uptodate(folio);
- else
- WARN_ON_ONCE(!folio_test_uptodate(folio));
+ __folio_mark_uptodate(folio);
/* Add shared, newly allocated pages to the page cache. */
- if (vm_shared && !is_continue) {
+ if (vm_shared) {
ret = -EFAULT;
if (idx >= (i_size_read(mapping->host) >> huge_page_shift(h)))
goto out_release_nounlock;
* For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
* with wp flag set, don't set pte write bit.
*/
- _dst_pte = make_huge_pte(dst_vma, folio,
- !wp_enabled && !(is_continue && !vm_shared));
+ _dst_pte = make_huge_pte(dst_vma, folio, !wp_enabled);
/*
* Always mark UFFDIO_COPY page dirty; note that this may not be
* extremely important for hugetlbfs for now since swapping is not
update_mmu_cache(dst_vma, dst_addr, dst_pte);
spin_unlock(ptl);
- if (!is_continue)
- folio_set_hugetlb_migratable(folio);
- if (vm_shared || is_continue)
+ folio_set_hugetlb_migratable(folio);
+ if (vm_shared)
folio_unlock(folio);
ret = 0;
out:
return ret;
out_release_unlock:
spin_unlock(ptl);
- if (vm_shared || is_continue)
+ if (vm_shared)
folio_unlock(folio);
out_release_nounlock:
if (!folio_in_pagecache)