return ERR_PTR(error);
}
+static struct folio *shmem_swap_alloc_folio(struct inode *inode,
+ struct vm_area_struct *vma, pgoff_t index,
+ swp_entry_t entry, int order, gfp_t gfp)
+{
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ struct folio *new;
+ void *shadow;
+ int nr_pages;
+
+ /*
+ * We have arrived here because our zones are constrained, so don't
+ * limit chance of success with further cpuset and node constraints.
+ */
+ gfp &= ~GFP_CONSTRAINT_MASK;
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && order > 0) {
+ gfp_t huge_gfp = vma_thp_gfp_mask(vma);
+
+ gfp = limit_gfp_mask(huge_gfp, gfp);
+ }
+
+ new = shmem_alloc_folio(gfp, order, info, index);
+ if (!new)
+ return ERR_PTR(-ENOMEM);
+
+ nr_pages = folio_nr_pages(new);
+ if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL,
+ gfp, entry)) {
+ folio_put(new);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /*
+ * Prevent parallel swapin from proceeding with the swap cache flag.
+ *
+ * Of course there is another possible concurrent scenario as well,
+ * that is to say, the swap cache flag of a large folio has already
+ * been set by swapcache_prepare(), while another thread may have
+ * already split the large swap entry stored in the shmem mapping.
+ * In this case, shmem_add_to_page_cache() will help identify the
+ * concurrent swapin and return -EEXIST.
+ */
+ if (swapcache_prepare(entry, nr_pages)) {
+ folio_put(new);
+ return ERR_PTR(-EEXIST);
+ }
+
+ __folio_set_locked(new);
+ __folio_set_swapbacked(new);
+ new->swap = entry;
+
+ mem_cgroup_swapin_uncharge_swap(entry, nr_pages);
+ shadow = get_shadow_from_swap_cache(entry);
+ if (shadow)
+ workingset_refault(new, shadow);
+ folio_add_lru(new);
+ swap_read_folio(new, NULL);
+ return new;
+}
+
/*
* When a page is moved from swapcache to shmem filecache (either by the
* usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of
}
static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
- struct folio *folio, swp_entry_t swap)
+ struct folio *folio, swp_entry_t swap,
+ bool skip_swapcache)
{
struct address_space *mapping = inode->i_mapping;
swp_entry_t swapin_error;
nr_pages = folio_nr_pages(folio);
folio_wait_writeback(folio);
- delete_from_swap_cache(folio);
+ if (!skip_swapcache)
+ delete_from_swap_cache(folio);
/*
* Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
* won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
struct shmem_inode_info *info = SHMEM_I(inode);
struct swap_info_struct *si;
struct folio *folio = NULL;
+ bool skip_swapcache = false;
swp_entry_t swap;
int error, nr_pages;
/* Look it up and read it in.. */
folio = swap_cache_get_folio(swap, NULL, 0);
if (!folio) {
+ int order = xa_get_order(&mapping->i_pages, index);
+ bool fallback_order0 = false;
int split_order;
/* Or update major stats only when swapin succeeds?? */
count_memcg_event_mm(fault_mm, PGMAJFAULT);
}
+ /*
+ * If uffd is active for the vma, we need per-page fault
+ * fidelity to maintain the uffd semantics, then fallback
+ * to swapin order-0 folio, as well as for zswap case.
+ */
+ if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) ||
+ !zswap_never_enabled()))
+ fallback_order0 = true;
+
+ /* Skip swapcache for synchronous device. */
+ if (!fallback_order0 && data_race(si->flags & SWP_SYNCHRONOUS_IO)) {
+ folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp);
+ if (!IS_ERR(folio)) {
+ skip_swapcache = true;
+ goto alloced;
+ }
+
+ /*
+ * Fallback to swapin order-0 folio unless the swap entry
+ * already exists.
+ */
+ error = PTR_ERR(folio);
+ folio = NULL;
+ if (error == -EEXIST)
+ goto failed;
+ }
+
/*
* Now swap device can only swap in order 0 folio, then we
* should split the large swap entry stored in the pagecache
}
}
+alloced:
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
- if (!folio_test_swapcache(folio) ||
+ if ((!skip_swapcache && !folio_test_swapcache(folio)) ||
folio->swap.val != swap.val ||
!shmem_confirm_swap(mapping, index, swap)) {
error = -EEXIST;
if (sgp == SGP_WRITE)
folio_mark_accessed(folio);
- delete_from_swap_cache(folio);
+ if (skip_swapcache) {
+ folio->swap.val = 0;
+ swapcache_clear(si, swap, nr_pages);
+ } else {
+ delete_from_swap_cache(folio);
+ }
folio_mark_dirty(folio);
swap_free_nr(swap, nr_pages);
put_swap_device(si);
if (!shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
if (error == -EIO)
- shmem_set_folio_swapin_error(inode, index, folio, swap);
+ shmem_set_folio_swapin_error(inode, index, folio, swap,
+ skip_swapcache);
unlock:
+ if (skip_swapcache)
+ swapcache_clear(si, swap, folio_nr_pages(folio));
if (folio) {
folio_unlock(folio);
folio_put(folio);