]> www.infradead.org Git - users/hch/xfs.git/commitdiff
xfs: don't keep a reference for buffers on the LRU
authorChristoph Hellwig <hch@lst.de>
Sun, 12 Jan 2025 15:38:56 +0000 (16:38 +0100)
committerChristoph Hellwig <hch@lst.de>
Mon, 13 Jan 2025 04:17:38 +0000 (05:17 +0100)
Currently the buffer cache adds a reference to b_hold for buffers that
are on the LRU.  This seems to go all the way back and allows releasing
buffers from the LRU using xfs_buf_rele.  But it makes xfs_buf_rele
really complicated in differs from how other LRUs are implemented in
Linux.

Switch to not having a reference for buffers in the LRU, and use a
separate negative hold value to mark buffers as did.  This simplifies
xfs_buf_rele, which now just deal with the last "real" reference,
and prepares for using the lockref primitive.

Signed-off-by: Christoph Hellwig <hch@lst.de>
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h

index 034452999fce67ad782ee23cfae259f673744497..b52db029333e6496e4330828bd78119f659bf6b2 100644 (file)
@@ -101,11 +101,8 @@ xfs_buf_stale(
 
        spin_lock(&bp->b_lock);
        atomic_set(&bp->b_lru_ref, 0);
-       if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
-           (list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru)))
-               bp->b_hold--;
-
-       ASSERT(bp->b_hold >= 1);
+       if (bp->b_hold >= 0)
+               list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
        spin_unlock(&bp->b_lock);
 }
 
@@ -516,7 +513,7 @@ xfs_buf_try_hold(
        struct xfs_buf          *bp)
 {
        spin_lock(&bp->b_lock);
-       if (bp->b_hold == 0) {
+       if (bp->b_hold == -1) {
                spin_unlock(&bp->b_lock);
                return false;
        }
@@ -992,76 +989,24 @@ xfs_buf_hold(
 }
 
 static void
-xfs_buf_rele_uncached(
-       struct xfs_buf          *bp)
-{
-       ASSERT(list_empty(&bp->b_lru));
-
-       spin_lock(&bp->b_lock);
-       if (--bp->b_hold) {
-               spin_unlock(&bp->b_lock);
-               return;
-       }
-       spin_unlock(&bp->b_lock);
-       xfs_buf_free(bp);
-}
-
-static void
-xfs_buf_rele_cached(
+xfs_buf_destroy(
        struct xfs_buf          *bp)
 {
-       struct xfs_buftarg      *btp = bp->b_target;
-       struct xfs_perag        *pag = bp->b_pag;
-       struct xfs_buf_cache    *bch = xfs_buftarg_buf_cache(btp, pag);
-       bool                    freebuf = false;
-
-       trace_xfs_buf_rele(bp, _RET_IP_);
-
-       spin_lock(&bp->b_lock);
-       ASSERT(bp->b_hold >= 1);
-       if (bp->b_hold > 1) {
-               bp->b_hold--;
-               goto out_unlock;
-       }
+       ASSERT(bp->b_hold < 0);
+       ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
 
-       /* we are asked to drop the last reference */
-       if (atomic_read(&bp->b_lru_ref)) {
-               /*
-                * If the buffer is added to the LRU, keep the reference to the
-                * buffer for the LRU and clear the (now stale) dispose list
-                * state flag, else drop the reference.
-                */
-               if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru))
-                       bp->b_state &= ~XFS_BSTATE_DISPOSE;
-               else
-                       bp->b_hold--;
-       } else {
-               bp->b_hold--;
-               /*
-                * most of the time buffers will already be removed from the
-                * LRU, so optimise that case by checking for the
-                * XFS_BSTATE_DISPOSE flag indicating the last list the buffer
-                * was on was the disposal list
-                */
-               if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-                       list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
-               } else {
-                       ASSERT(list_empty(&bp->b_lru));
-               }
+       if (!xfs_buf_is_uncached(bp)) {
+               struct xfs_buf_cache    *bch =
+                       xfs_buftarg_buf_cache(bp->b_target, bp->b_pag);
 
-               ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
                rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
                                xfs_buf_hash_params);
-               if (pag)
-                       xfs_perag_put(pag);
-               freebuf = true;
-       }
 
-out_unlock:
-       spin_unlock(&bp->b_lock);
+               if (bp->b_pag)
+                       xfs_perag_put(bp->b_pag);
+       }
 
-       if (freebuf)
-               xfs_buf_free(bp);
+       xfs_buf_free(bp);
 }
 
 /*
@@ -1072,10 +1017,22 @@ xfs_buf_rele(
        struct xfs_buf          *bp)
 {
        trace_xfs_buf_rele(bp, _RET_IP_);
-       if (xfs_buf_is_uncached(bp))
-               xfs_buf_rele_uncached(bp);
-       else
-               xfs_buf_rele_cached(bp);
+
+       spin_lock(&bp->b_lock);
+       if (!--bp->b_hold) {
+               if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref))
+                       goto kill;
+               list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru);
+       }
+       spin_unlock(&bp->b_lock);
+       return;
+
+kill:
+       bp->b_hold = -1;
+       list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
+       spin_unlock(&bp->b_lock);
+
+       xfs_buf_destroy(bp);
 }
 
 /*
@@ -1383,9 +1340,11 @@ xfs_buf_ioerror_alert(
 
 /*
  * To simulate an I/O failure, the buffer must be locked and held with at least
- * three references. The LRU reference is dropped by the stale call. The buf
- * item reference is dropped via ioend processing. The third reference is owned
- * by the caller and is dropped on I/O completion if the buffer is XBF_ASYNC.
+ * two references.
+ *
+ * The buf item reference is dropped via ioend processing. The second reference
+ * is owned by the caller and is dropped on I/O completion if the buffer is
+ * XBF_ASYNC.
  */
 void
 xfs_buf_ioend_fail(
@@ -1692,19 +1651,14 @@ xfs_buftarg_drain_rele(
 
        if (!spin_trylock(&bp->b_lock))
                return LRU_SKIP;
-       if (bp->b_hold > 1) {
+       if (bp->b_hold > 0) {
                /* need to wait, so skip it this pass */
                spin_unlock(&bp->b_lock);
                trace_xfs_buf_drain_buftarg(bp, _RET_IP_);
                return LRU_SKIP;
        }
 
-       /*
-        * clear the LRU reference count so the buffer doesn't get
-        * ignored in xfs_buf_rele().
-        */
-       atomic_set(&bp->b_lru_ref, 0);
-       bp->b_state |= XFS_BSTATE_DISPOSE;
+       bp->b_hold = -1;
        list_lru_isolate_move(lru, item, dispose);
        spin_unlock(&bp->b_lock);
        return LRU_REMOVED;
@@ -1759,7 +1713,7 @@ xfs_buftarg_drain(
 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!",
                                        (long long)xfs_buf_daddr(bp));
                        }
-                       xfs_buf_rele(bp);
+                       xfs_buf_destroy(bp);
                }
                if (loop++ != 0)
                        delay(100);
@@ -1788,11 +1742,23 @@ xfs_buftarg_isolate(
        struct list_head        *dispose = arg;
 
        /*
-        * we are inverting the lru lock/bp->b_lock here, so use a trylock.
-        * If we fail to get the lock, just skip it.
+        * We are inverting the lru lock vs bp->b_lock order here, so use a
+        * trylock. If we fail to get the lock, just skip the buffer.
         */
        if (!spin_trylock(&bp->b_lock))
                return LRU_SKIP;
+
+       /*
+        * Іf the buffer is in use, remove it from the LRU for now as we can't
+        * free it.  It will be added to the LRU again when the reference count
+        * hits zero.
+        */
+       if (bp->b_hold > 0) {
+               list_lru_isolate(lru, &bp->b_lru);
+               spin_unlock(&bp->b_lock);
+               return LRU_REMOVED;
+       }
+
        /*
         * Decrement the b_lru_ref count unless the value is already
         * zero. If the value is already zero, we need to reclaim the
@@ -1803,7 +1769,7 @@ xfs_buftarg_isolate(
                return LRU_ROTATE;
        }
 
-       bp->b_state |= XFS_BSTATE_DISPOSE;
+       bp->b_hold = -1;
        list_lru_isolate_move(lru, item, dispose);
        spin_unlock(&bp->b_lock);
        return LRU_REMOVED;
@@ -1825,7 +1791,7 @@ xfs_buftarg_shrink_scan(
                struct xfs_buf *bp;
                bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                list_del_init(&bp->b_lru);
-               xfs_buf_rele(bp);
+               xfs_buf_destroy(bp);
        }
 
        return freed;
index 604016911ddc6f165a495dff6693e5133471efad..ad7637ae26f52df463bf924984011e1075700ba6 100644 (file)
@@ -71,11 +71,6 @@ typedef unsigned int xfs_buf_flags_t;
        { XBF_TRYLOCK,          "TRYLOCK" }, \
        { XBF_UNMAPPED,         "UNMAPPED" }
 
-/*
- * Internal state flags.
- */
-#define XFS_BSTATE_DISPOSE      (1 << 0)       /* buffer being discarded */
-
 struct xfs_buf_cache {
        struct rhashtable       bc_hash;
 };
@@ -165,7 +160,7 @@ struct xfs_buf {
        xfs_daddr_t             b_rhash_key;    /* buffer cache index */
        int                     b_length;       /* size of buffer in BBs */
        spinlock_t              b_lock;         /* internal state lock */
-       unsigned int            b_hold;         /* reference count */
+       int                     b_hold;         /* reference count */
        atomic_t                b_lru_ref;      /* lru reclaim ref count */
        xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
@@ -175,7 +170,6 @@ struct xfs_buf {
         * bt_lru_lock and not by b_sema
         */
        struct list_head        b_lru;          /* lru list */
-       unsigned int            b_state;        /* internal state flags */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;