]> www.infradead.org Git - users/hch/misc.git/commitdiff
xfs: don't include bnobt blocks when reserving free block pool
authorDarrick J. Wong <djwong@kernel.org>
Wed, 16 Mar 2022 18:54:18 +0000 (11:54 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Mon, 28 Mar 2022 15:38:43 +0000 (08:38 -0700)
xfs_reserve_blocks controls the size of the user-visible free space
reserve pool.  Given the difference between the current and requested
pool sizes, it will try to reserve free space from fdblocks.  However,
the amount requested from fdblocks is also constrained by the amount of
space that we think xfs_mod_fdblocks will give us.  If we forget to
subtract m_allocbt_blks before calling xfs_mod_fdblocks, it will will
return ENOSPC and we'll hang the kernel at mount due to the infinite
loop.

In commit fd43cf600cf6, we decided that xfs_mod_fdblocks should not hand
out the "free space" used by the free space btrees, because some portion
of the free space btrees hold in reserve space for future btree
expansion.  Unfortunately, xfs_reserve_blocks' estimation of the number
of blocks that it could request from xfs_mod_fdblocks was not updated to
include m_allocbt_blks, so if space is extremely low, the caller hangs.

Fix this by creating a function to estimate the number of blocks that
can be reserved from fdblocks, which needs to exclude the set-aside and
m_allocbt_blks.

Found by running xfs/306 (which formats a single-AG 20MB filesystem)
with an fstests configuration that specifies a 1k blocksize and a
specially crafted log size that will consume 7/8 of the space (17920
blocks, specifically) in that AG.

Cc: Brian Foster <bfoster@redhat.com>
Fixes: fd43cf600cf6 ("xfs: set aside allocation btree blocks from block reservation")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
fs/xfs/xfs_fsops.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h

index 33e26690a8c4fc1d3722b2eabe51245cd87d6aa5..710e857bb825f25bfc840939609a380268270c4e 100644 (file)
@@ -434,7 +434,7 @@ xfs_reserve_blocks(
        error = -ENOSPC;
        do {
                free = percpu_counter_sum(&mp->m_fdblocks) -
-                                               mp->m_alloc_set_aside;
+                                               xfs_fdblocks_unavailable(mp);
                if (free <= 0)
                        break;
 
index bed73e8002a51bc0bdebe2922faa43857438971f..29ffa8c427954af7b99b6b4725f966161c995048 100644 (file)
@@ -1146,7 +1146,7 @@ xfs_mod_fdblocks(
         * problems (i.e. transaction abort, pagecache discards, etc.) than
         * slightly premature -ENOSPC.
         */
-       set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+       set_aside = xfs_fdblocks_unavailable(mp);
        percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
        if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
                                     XFS_FDBLOCKS_BATCH) >= 0) {
index 00720a02e7615198454b8296047aa98d0299859c..f6dc19de8322a6f104672bb61d53b8983e9b8566 100644 (file)
@@ -479,6 +479,21 @@ extern void        xfs_unmountfs(xfs_mount_t *);
  */
 #define XFS_FDBLOCKS_BATCH     1024
 
+/*
+ * Estimate the amount of free space that is not available to userspace and is
+ * not explicitly reserved from the incore fdblocks.  This includes:
+ *
+ * - The minimum number of blocks needed to support splitting a bmap btree
+ * - The blocks currently in use by the freespace btrees because they record
+ *   the actual blocks that will fill per-AG metadata space reservations
+ */
+static inline uint64_t
+xfs_fdblocks_unavailable(
+       struct xfs_mount        *mp)
+{
+       return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+}
+
 extern int     xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                 bool reserved);
 extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);