fs/dax: don't skip locked entries when scanning entries

author Alistair Popple <apopple@nvidia.com>

Tue, 18 Feb 2025 03:55:19 +0000 (14:55 +1100)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 28 Feb 2025 01:00:11 +0000 (17:00 -0800)
author Alistair Popple <apopple@nvidia.com>
Tue, 18 Feb 2025 03:55:19 +0000 (14:55 +1100)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 28 Feb 2025 01:00:11 +0000 (17:00 -0800)
diff --git a/fs/dax.c b/fs/dax.c

index b35f538c4330be98e4d4a132c2fb2fe7fe08125b..f5fdb43f5de332d98a5534f7d65722b066f13ed8 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -206,7 +206,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry,
   *
   * Must be called with the i_pages lock held.
   */
-static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
+static void *get_next_unlocked_entry(struct xa_state *xas, unsigned int order)
  {
         void *entry;
         struct wait_exceptional_entry_queue ewait;
@@ -235,6 +235,37 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
         }
  }
  
+/*
+ * Wait for the given entry to become unlocked. Caller must hold the i_pages
+ * lock and call either put_unlocked_entry() if it did not lock the entry or
+ * dax_unlock_entry() if it did. Returns an unlocked entry if still present.
+ */
+static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry)
+{
+       struct wait_exceptional_entry_queue ewait;
+       wait_queue_head_t *wq;
+
+       init_wait(&ewait.wait);
+       ewait.wait.func = wake_exceptional_entry_func;
+
+       while (unlikely(dax_is_locked(entry))) {
+               wq = dax_entry_waitqueue(xas, entry, &ewait.key);
+               prepare_to_wait_exclusive(wq, &ewait.wait,
+                                       TASK_UNINTERRUPTIBLE);
+               xas_pause(xas);
+               xas_unlock_irq(xas);
+               schedule();
+               finish_wait(wq, &ewait.wait);
+               xas_lock_irq(xas);
+               entry = xas_load(xas);
+       }
+
+       if (xa_is_internal(entry))
+               return NULL;
+
+       return entry;
+}
+
  /*
   * The only thing keeping the address space around is the i_pages lock
   * (it's cycled in clear_inode() after removing the entries from i_pages)
@@ -250,7 +281,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
  
         wq = dax_entry_waitqueue(xas, entry, &ewait.key);
         /*
-        * Unlike get_unlocked_entry() there is no guarantee that this
+        * Unlike get_next_unlocked_entry() there is no guarantee that this
          * path ever successfully retrieves an unlocked entry before an
          * inode dies. Perform a non-exclusive wait in case this path
          * never successfully performs its own wake up.
@@ -581,7 +612,7 @@ static void *grab_mapping_entry(struct xa_state *xas,
  retry:
         pmd_downgrade = false;
         xas_lock_irq(xas);
-       entry = get_unlocked_entry(xas, order);
+       entry = get_next_unlocked_entry(xas, order);
  
         if (entry) {
                 if (dax_is_conflict(entry))
@@ -717,8 +748,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
         xas_for_each(&xas, entry, end_idx) {
                 if (WARN_ON_ONCE(!xa_is_value(entry)))
                         continue;
-               if (unlikely(dax_is_locked(entry)))
-                       entry = get_unlocked_entry(&xas, 0);
+               entry = wait_entry_unlocked_exclusive(&xas, entry);
                 if (entry)
                         page = dax_busy_page(entry);
                 put_unlocked_entry(&xas, entry, WAKE_NEXT);
@@ -751,7 +781,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
         void *entry;
  
         xas_lock_irq(&xas);
-       entry = get_unlocked_entry(&xas, 0);
+       entry = get_next_unlocked_entry(&xas, 0);
         if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
                 goto out;
         if (!trunc &&
@@ -777,7 +807,9 @@ static int __dax_clear_dirty_range(struct address_space *mapping,
  
         xas_lock_irq(&xas);
         xas_for_each(&xas, entry, end) {
-               entry = get_unlocked_entry(&xas, 0);
+               entry = wait_entry_unlocked_exclusive(&xas, entry);
+               if (!entry)
+                       continue;
                 xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
                 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
                 put_unlocked_entry(&xas, entry, WAKE_NEXT);
@@ -941,7 +973,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
         if (unlikely(dax_is_locked(entry))) {
                 void *old_entry = entry;
  
-               entry = get_unlocked_entry(xas, 0);
+               entry = get_next_unlocked_entry(xas, 0);
  
                 /* Entry got punched out / reallocated? */
                 if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
@@ -1950,7 +1982,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
         vm_fault_t ret;
  
         xas_lock_irq(&xas);
-       entry = get_unlocked_entry(&xas, order);
+       entry = get_next_unlocked_entry(&xas, order);
         /* Did we race with someone splitting entry or so? */
         if (!entry || dax_is_conflict(entry) ||
             (order == 0 && !dax_is_pte_entry(entry))) {
author	Alistair Popple <apopple@nvidia.com>
	Tue, 18 Feb 2025 03:55:19 +0000 (14:55 +1100)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 28 Feb 2025 01:00:11 +0000 (17:00 -0800)