xfs: fix unmount hang with unflushable inodes stuck in the AIL

author Dave Chinner <dchinner@redhat.com>

Fri, 23 May 2025 04:47:07 +0000 (06:47 +0200)

committer Christoph Hellwig <hch@lst.de>

Fri, 23 May 2025 04:48:32 +0000 (06:48 +0200)
author Dave Chinner <dchinner@redhat.com>
Fri, 23 May 2025 04:47:07 +0000 (06:47 +0200)
committer Christoph Hellwig <hch@lst.de>
Fri, 23 May 2025 04:48:32 +0000 (06:48 +0200)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 6124b22c890ff8538b689e07658417bc7855e8f7..83bff1b04b469486fc9224fc371f0de2113869b0 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -614,43 +614,42 @@ xfs_buf_item_push(
   * Drop the buffer log item refcount and take appropriate action. This helper
   * determines whether the bli must be freed or not, since a decrement to zero
   * does not necessarily mean the bli is unused.
- *
- * Return true if the bli is freed, false otherwise.
   */
-bool
+void
  xfs_buf_item_put(
         struct xfs_buf_log_item *bip)
  {
-       struct xfs_log_item     *lip = &bip->bli_item;
-       bool                    aborted;
-       bool                    dirty;
+
+       ASSERT(xfs_buf_islocked(bip->bli_buf));
  
         /* drop the bli ref and return if it wasn't the last one */
         if (!atomic_dec_and_test(&bip->bli_refcount))
-               return false;
+               return;
  
-       /*
-        * We dropped the last ref and must free the item if clean or aborted.
-        * If the bli is dirty and non-aborted, the buffer was clean in the
-        * transaction but still awaiting writeback from previous changes. In
-        * that case, the bli is freed on buffer writeback completion.
-        */
-       aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
-                       xlog_is_shutdown(lip->li_log);
-       dirty = bip->bli_flags & XFS_BLI_DIRTY;
-       if (dirty && !aborted)
-               return false;
+       /* If the BLI is in the AIL, then it is still dirty and in use */
+       if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
+               ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
+               return;
+       }
  
         /*
-        * The bli is aborted or clean. An aborted item may be in the AIL
-        * regardless of dirty state.  For example, consider an aborted
-        * transaction that invalidated a dirty bli and cleared the dirty
-        * state.
+        * In shutdown conditions, we can be asked to free a dirty BLI that
+        * isn't in the AIL. This can occur due to a checkpoint aborting a BLI
+        * instead of inserting it into the AIL at checkpoint IO completion. If
+        * there's another bli reference (e.g. a btree cursor holds a clean
+        * reference) and it is released via xfs_trans_brelse(), we can get here
+        * with that aborted, dirty BLI. In this case, it is safe to free the
+        * dirty BLI immediately, as it is not in the AIL and there are no
+        * other references to it.
+        *
+        * We should never get here with a stale BLI via that path as
+        * xfs_trans_brelse() specifically holds onto stale buffers rather than
+        * releasing them.
          */
-       if (aborted)
-               xfs_trans_ail_delete(lip, 0);
+       ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
+                       test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
+       ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
         xfs_buf_item_relse(bip);
-       return true;
  }
  
  /*
@@ -671,6 +670,15 @@ xfs_buf_item_put(
   * if necessary but do not unlock the buffer.  This is for support of
   * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
   * free the item.
+ *
+ * If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
+ * perform a completion abort of any objects attached to the buffer for IO
+ * tracking purposes. This generally only happens in shutdown situations,
+ * normally xfs_buf_item_unpin() will drop the last BLI reference and perform
+ * completion processing. However, because transaction completion can race with
+ * checkpoint completion during a shutdown, this release context may end up
+ * being the last active reference to the BLI and so needs to perform this
+ * cleanup.
   */
  STATIC void
  xfs_buf_item_release(
@@ -678,18 +686,19 @@ xfs_buf_item_release(
  {
         struct xfs_buf_log_item *bip = BUF_ITEM(lip);
         struct xfs_buf          *bp = bip->bli_buf;
-       bool                    released;
         bool                    hold = bip->bli_flags & XFS_BLI_HOLD;
         bool                    stale = bip->bli_flags & XFS_BLI_STALE;
-#if defined(DEBUG) || defined(XFS_WARN)
-       bool                    ordered = bip->bli_flags & XFS_BLI_ORDERED;
-       bool                    dirty = bip->bli_flags & XFS_BLI_DIRTY;
         bool                    aborted = test_bit(XFS_LI_ABORTED,
                                                    &lip->li_flags);
+       bool                    dirty = bip->bli_flags & XFS_BLI_DIRTY;
+#if defined(DEBUG) || defined(XFS_WARN)
+       bool                    ordered = bip->bli_flags & XFS_BLI_ORDERED;
  #endif
  
         trace_xfs_buf_item_release(bip);
  
+       ASSERT(xfs_buf_islocked(bp));
+
         /*
          * The bli dirty state should match whether the blf has logged segments
          * except for ordered buffers, where only the bli should be dirty.
@@ -706,15 +715,54 @@ xfs_buf_item_release(
         bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
  
         /*
-        * Unref the item and unlock the buffer unless held or stale. Stale
-        * buffers remain locked until final unpin unless the bli is freed by
-        * the unref call. The latter implies shutdown because buffer
-        * invalidation dirties the bli and transaction.
+        * If this is the last reference to the BLI, we must clean it up
+        * before freeing it and releasing the buffer attached to it.
+        */
+       if (atomic_dec_and_test(&bip->bli_refcount)) {
+               if (stale) {
+                       /*
+                        * Stale buffer completion frees the BLI, unlocks and
+                        * releases the buffer. Neither the BLI or buffer are
+                        * safe to reference after this call, so there's nothing
+                        * more we need to do here.
+                        */
+                       xfs_buf_item_finish_stale(bp);
+                       return;
+               } else if (aborted || xlog_is_shutdown(lip->li_log)) {
+                       /*
+                        * Dirty or clean, aborted items are done and need to be
+                        * removed from the AIL and released. This frees the
+                        * BLI, but leaves the buffer locked and referenced.
+                        */
+                       ASSERT(list_empty(&bip->bli_buf->b_li_list));
+                       xfs_buf_item_done(bp);
+               } else if (!dirty) {
+                       /*
+                        * Clean, unreferenced BLIs can be immediately freed,
+                        * leaving the buffer locked and referenced.
+                        */
+                       xfs_buf_item_relse(bip);
+               } else {
+                       /*
+                        * Dirty, unreferenced BLIs *must* be in the AIL
+                        * awaiting writeback.
+                        */
+                       ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
+               }
+       }
+
+       /* Not safe to reference the BLI from here */
+
+       /*
+        * If we get here with a stale buffer, there are references to the BLI
+        * still remaining. We mus tnot unlock the buffer in this case as we've
+        * handed the lock context to the last BLI reference to process.
+        *
+        * Hence if the buffer is stale or the transaction owner wants the
+        * buffer to remain locked across the commit call, do not release it.
          */
-       released = xfs_buf_item_put(bip);
-       if (hold || (stale && !released))
+       if (hold || stale)
                 return;
-       ASSERT(!stale || aborted);
         xfs_buf_relse(bp);
  }
  
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h

index 50dd79b59cf5a3d2f4984c15df73d64542dd0bc4..66f65eaba8c3e7041b967edfa958b33389901ecb 100644 (file)
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -49,7 +49,7 @@ struct xfs_buf_log_item {
  
  int    xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
  void   xfs_buf_item_done(struct xfs_buf *bp);
-bool   xfs_buf_item_put(struct xfs_buf_log_item *);
+void   xfs_buf_item_put(struct xfs_buf_log_item *);
  void   xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
  bool   xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
  void   xfs_buf_inode_iodone(struct xfs_buf *);
author	Dave Chinner <dchinner@redhat.com>
	Fri, 23 May 2025 04:47:07 +0000 (06:47 +0200)
committer	Christoph Hellwig <hch@lst.de>
	Fri, 23 May 2025 04:48:32 +0000 (06:48 +0200)
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_buf_item.h		patch \| blob \| history