xfs: generalize the freespace and reserved blocks handling

author Christoph Hellwig <hch@lst.de>

Mon, 16 Dec 2024 07:10:35 +0000 (08:10 +0100)

committer Christoph Hellwig <hch@lst.de>

Mon, 3 Feb 2025 04:49:07 +0000 (05:49 +0100)
author Christoph Hellwig <hch@lst.de>
Mon, 16 Dec 2024 07:10:35 +0000 (08:10 +0100)
committer Christoph Hellwig <hch@lst.de>
Mon, 3 Feb 2025 04:49:07 +0000 (05:49 +0100)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index f3a840a425f5101982cba9fd902333143814b027..57513ba19d6a71648bf72b1461c27c44c38e325c 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1927,7 +1927,7 @@ xfs_dialloc(
          * that we can immediately allocate, but then we allow allocation on the
          * second pass if we fail to find an AG with free inodes in it.
          */
-       if (percpu_counter_read_positive(&mp->m_fdblocks) <
+       if (xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) <
                         mp->m_low_space[XFS_LOWSP_1_PCNT]) {
                 ok_alloc = false;
                 low_space = true;
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c

index 2f5f554a36d4c9e08e71174af7182fd3f8ff10f6..7625e694eb8dffbdd39e009d5d9cb9d278c7e83a 100644 (file)
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -95,7 +95,7 @@ xfs_metafile_resv_can_cover(
          * There aren't enough blocks left in the inode's reservation, but it
          * isn't critical unless there also isn't enough free space.
          */
-       return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
+       return xfs_compare_freecounter(ip->i_mount, XC_FREE_BLOCKS,
                         rhs - ip->i_delayed_blks, 2048) >= 0;
  }
  
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 3dc5f5dba162d09eaa07b6e03e86fd0241d495d0..80e383f641551e5917b019518a34f0967d5fc948 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1265,8 +1265,7 @@ xfs_log_sb(
                 mp->m_sb.sb_ifree = min_t(uint64_t,
                                 percpu_counter_sum_positive(&mp->m_ifree),
                                 mp->m_sb.sb_icount);
-               mp->m_sb.sb_fdblocks =
-                               percpu_counter_sum_positive(&mp->m_fdblocks);
+               mp->m_sb.sb_fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
         }
  
         /*
@@ -1275,9 +1274,10 @@ xfs_log_sb(
          * we handle nearly-lockless reservations, so we must use the _positive
          * variant here to avoid writing out nonsense frextents.
          */
-       if (xfs_has_rtgroups(mp))
+       if (xfs_has_rtgroups(mp)) {
                 mp->m_sb.sb_frextents =
-                               percpu_counter_sum_positive(&mp->m_frextents);
+                       xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS);
+       }
  
         xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c

index ca23cf4db6c5ef727ce046c25cb5d0e8b3444359..22cac61172ee27f4c3359343eb4a9d77a0d3f493 100644 (file)
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -350,7 +350,7 @@ retry:
          * The global incore space reservation is taken from the incore
          * counters, so leave that out of the computation.
          */
-       fsc->fdblocks -= mp->m_resblks_avail;
+       fsc->fdblocks -= mp->m_resblks[XC_FREE_BLOCKS].avail;
  
         /*
          * Delayed allocation reservations are taken out of the incore counters
@@ -513,8 +513,8 @@ xchk_fscounters(
         /* Snapshot the percpu counters. */
         icount = percpu_counter_sum(&mp->m_icount);
         ifree = percpu_counter_sum(&mp->m_ifree);
-       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
-       frextents = percpu_counter_sum(&mp->m_frextents);
+       fdblocks = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS);
+       frextents = xfs_sum_freecounter_raw(mp, XC_FREE_RTEXTENTS);
  
         /* No negative values, please! */
         if (icount < 0 || ifree < 0)
@@ -589,15 +589,16 @@ xchk_fscounters(
                         try_again = true;
         }
  
-       if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
-                       fsc->fdblocks)) {
+       if (!xchk_fscount_within_range(sc, fdblocks,
+                       &mp->m_free[XC_FREE_BLOCKS], fsc->fdblocks)) {
                 if (fsc->frozen)
                         xchk_set_corrupt(sc);
                 else
                         try_again = true;
         }
  
-       if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+       if (!xchk_fscount_within_range(sc, frextents,
+                       &mp->m_free[XC_FREE_RTEXTENTS],
                         fsc->frextents - fsc->frextents_delayed)) {
                 if (fsc->frozen)
                         xchk_set_corrupt(sc);
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c

index cda13447a373e14240acbd3f33d153138e8e141e..8fb0db78489eeffb06b037edffd782959f4f60d2 100644 (file)
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -64,7 +64,7 @@ xrep_fscounters(
  
         percpu_counter_set(&mp->m_icount, fsc->icount);
         percpu_counter_set(&mp->m_ifree, fsc->ifree);
-       percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+       xfs_set_freecounter(mp, XC_FREE_BLOCKS, fsc->fdblocks);
  
         /*
          * Online repair is only supported on v5 file systems, which require
@@ -74,7 +74,7 @@ xrep_fscounters(
          * track of the delalloc reservations separately, as they are are
          * subtracted from m_frextents, but not included in sb_frextents.
          */
-       percpu_counter_set(&mp->m_frextents,
+       xfs_set_freecounter(mp, XC_FREE_RTEXTENTS,
                 fsc->frextents - fsc->frextents_delayed);
         if (!xfs_has_rtgroups(mp))
                 mp->m_sb.sb_frextents = fsc->frextents;
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c

index ac38f584309029990b9226c22903608e9b26ad50..1588ce971cb8e12543fe392525cd3f139e25e721 100644 (file)
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -62,7 +62,7 @@ xrep_newbt_estimate_slack(
                 free = sc->sa.pag->pagf_freeblks;
                 sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
         } else {
-               free = percpu_counter_sum(&sc->mp->m_fdblocks);
+               free = xfs_sum_freecounter_raw(sc->mp, XC_FREE_BLOCKS);
                 sz = sc->mp->m_sb.sb_dblocks;
         }
  
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 455298503d010209a7cdfbff8f0af81309c66a4c..73275d7a6ec0d80a6ebb05bdf278c02e1d475898 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -366,6 +366,7 @@ xfs_growfs_log(
  int
  xfs_reserve_blocks(
         struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr,
         uint64_t                request)
  {
         int64_t                 lcounter, delta;
@@ -373,6 +374,8 @@ xfs_reserve_blocks(
         int64_t                 free;
         int                     error = 0;
  
+       ASSERT(ctr < XC_FREE_NR);
+
         /*
          * With per-cpu counters, this becomes an interesting problem. we need
          * to work out if we are freeing or allocation blocks first, then we can
@@ -391,16 +394,16 @@ xfs_reserve_blocks(
          * counters directly since we shouldn't have any problems unreserving
          * space.
          */
-       if (mp->m_resblks > request) {
-               lcounter = mp->m_resblks_avail - request;
+       if (mp->m_resblks[ctr].total > request) {
+               lcounter = mp->m_resblks[ctr].avail - request;
                 if (lcounter > 0) {             /* release unused blocks */
                         fdblks_delta = lcounter;
-                       mp->m_resblks_avail -= lcounter;
+                       mp->m_resblks[ctr].avail -= lcounter;
                 }
-               mp->m_resblks = request;
+               mp->m_resblks[ctr].total = request;
                 if (fdblks_delta) {
                         spin_unlock(&mp->m_sb_lock);
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                         spin_lock(&mp->m_sb_lock);
                 }
  
@@ -409,7 +412,7 @@ xfs_reserve_blocks(
  
         /*
          * If the request is larger than the current reservation, reserve the
-        * blocks before we update the reserve counters. Sample m_fdblocks and
+        * blocks before we update the reserve counters. Sample m_free and
          * perform a partial reservation if the request exceeds free space.
          *
          * The code below estimates how many blocks it can request from
@@ -419,10 +422,10 @@ xfs_reserve_blocks(
          * space to fill it because mod_fdblocks will refill an undersized
          * reserve when it can.
          */
-       free = percpu_counter_sum(&mp->m_fdblocks) -
-                                               xfs_fdblocks_unavailable(mp);
-       delta = request - mp->m_resblks;
-       mp->m_resblks = request;
+       free = xfs_sum_freecounter_raw(mp, ctr) -
+               xfs_freecounter_unavailable(mp, ctr);
+       delta = request - mp->m_resblks[ctr].total;
+       mp->m_resblks[ctr].total = request;
         if (delta > 0 && free > 0) {
                 /*
                  * We'll either succeed in getting space from the free block
@@ -436,9 +439,9 @@ xfs_reserve_blocks(
                  */
                 fdblks_delta = min(free, delta);
                 spin_unlock(&mp->m_sb_lock);
-               error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+               error = xfs_dec_freecounter(mp, ctr, fdblks_delta, 0);
                 if (!error)
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, ctr, fdblks_delta);
                 spin_lock(&mp->m_sb_lock);
         }
  out:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h

index 3e2f73bcf8314b87520cbb7ae0bf6542f756b5d5..9d23c361ef56e4ebc5b00787c90f21567e3c145b 100644 (file)
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -8,7 +8,8 @@
  
  int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
  int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, enum xfs_free_counter cnt,
+               uint64_t request);
  int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
  
  int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 7b6c026d01a1fc020a41a678964cdbf7a8113323..c9ded501e89b2e813b99ffef5e4ab661d880ce1e 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2076,7 +2076,7 @@ xfs_inodegc_want_queue_rt_file(
         if (!XFS_IS_REALTIME_INODE(ip))
                 return false;
  
-       if (__percpu_counter_compare(&mp->m_frextents,
+       if (xfs_compare_freecounter(mp, XC_FREE_RTEXTENTS,
                                 mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
                                 XFS_FDBLOCKS_BATCH) < 0)
                 return true;
@@ -2104,7 +2104,7 @@ xfs_inodegc_want_queue_work(
         if (items > mp->m_ino_geo.inodes_per_cluster)
                 return true;
  
-       if (__percpu_counter_compare(&mp->m_fdblocks,
+       if (xfs_compare_freecounter(mp, XC_FREE_BLOCKS,
                                 mp->m_low_space[XFS_LOWSP_5_PCNT],
                                 XFS_FDBLOCKS_BATCH) < 0)
                 return true;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index ed85322507ddcc871d26b4a39698714f6d5c96ae..e7f2121f5b6225f9aaffe4855410efe5e6064a5c 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1131,15 +1131,15 @@ xfs_ioctl_getset_resblocks(
                 error = mnt_want_write_file(filp);
                 if (error)
                         return error;
-               error = xfs_reserve_blocks(mp, fsop.resblks);
+               error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, fsop.resblks);
                 mnt_drop_write_file(filp);
                 if (error)
                         return error;
         }
  
         spin_lock(&mp->m_sb_lock);
-       fsop.resblks = mp->m_resblks;
-       fsop.resblks_avail = mp->m_resblks_avail;
+       fsop.resblks = mp->m_resblks[XC_FREE_BLOCKS].total;
+       fsop.resblks_avail = mp->m_resblks[XC_FREE_BLOCKS].avail;
         spin_unlock(&mp->m_sb_lock);
  
         if (copy_to_user(arg, &fsop, sizeof(fsop)))
@@ -1155,9 +1155,9 @@ xfs_ioctl_fs_counts(
         struct xfs_fsop_counts  out = {
                 .allocino = percpu_counter_read_positive(&mp->m_icount),
                 .freeino  = percpu_counter_read_positive(&mp->m_ifree),
-               .freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
-                               xfs_fdblocks_unavailable(mp),
-               .freertx  = percpu_counter_read_positive(&mp->m_frextents),
+               .freedata = xfs_estimate_freecounter(mp, XC_FREE_BLOCKS) -
+                               xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS),
+               .freertx  = xfs_estimate_freecounter(mp, XC_FREE_RTEXTENTS),
         };
  
         if (copy_to_user(uarg, &out, sizeof(out)))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index b3783d7b8ebecf988c408248dcabb9747dbf6de6..f3f4b5c328c3dfd95919beebb5f3622f6418df14 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -432,13 +432,14 @@ xfs_quota_calc_throttle(
  
  static int64_t
  xfs_iomap_freesp(
-       struct percpu_counter   *counter,
+       struct xfs_mount        *mp,
+       unsigned int            idx,
         uint64_t                low_space[XFS_LOWSP_MAX],
         int                     *shift)
  {
         int64_t                 freesp;
  
-       freesp = percpu_counter_read_positive(counter);
+       freesp = xfs_estimate_freecounter(mp, idx);
         if (freesp < low_space[XFS_LOWSP_5_PCNT]) {
                 *shift = 2;
                 if (freesp < low_space[XFS_LOWSP_4_PCNT])
@@ -537,10 +538,10 @@ xfs_iomap_prealloc_size(
  
         if (unlikely(XFS_IS_REALTIME_INODE(ip)))
                 freesp = xfs_rtbxlen_to_blen(mp,
-                               xfs_iomap_freesp(&mp->m_frextents,
+                               xfs_iomap_freesp(mp, XC_FREE_RTEXTENTS,
                                         mp->m_low_rtexts, &shift));
         else
-               freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space,
+               freesp = xfs_iomap_freesp(mp, XC_FREE_BLOCKS, mp->m_low_space,
                                 &shift);
  
         /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 477c5262cf912074e637a04010c24e10ab6a1259..b81a03b3133daeca495c5b9c84f406a134a1ac1d 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1056,7 +1056,8 @@ xfs_mountfs(
          * we were already there on the last unmount. Warn if this occurs.
          */
         if (!xfs_is_readonly(mp)) {
-               error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
+               error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS,
+                               xfs_default_resblks(mp));
                 if (error)
                         xfs_warn(mp,
         "Unable to allocate reserve blocks. Continuing without reserve pool.");
@@ -1176,7 +1177,7 @@ xfs_unmountfs(
          * we only every apply deltas to the superblock and hence the incore
          * value does not matter....
          */
-       error = xfs_reserve_blocks(mp, 0);
+       error = xfs_reserve_blocks(mp, XC_FREE_BLOCKS, 0);
         if (error)
                 xfs_warn(mp, "Unable to free reserved block pool. "
                                 "Freespace may not be correct on next mount.");
@@ -1223,52 +1224,68 @@ xfs_fs_writable(
         return true;
  }
  
+/*
+ * Estimate the amount of free space that is not available to userspace and is
+ * not explicitly reserved from the incore fdblocks.  This includes:
+ *
+ * - The minimum number of blocks needed to support splitting a bmap btree
+ * - The blocks currently in use by the freespace btrees because they record
+ *   the actual blocks that will fill per-AG metadata space reservations
+ */
+uint64_t
+xfs_freecounter_unavailable(
+       struct xfs_mount        *mp,
+       enum xfs_free_counter   ctr)
+{
+       if (ctr == XC_FREE_RTEXTENTS)
+               return 0;
+       return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+}
+
  void
  xfs_add_freecounter(
         struct xfs_mount        *mp,
-       struct percpu_counter   *counter,
+       enum xfs_free_counter   ctr,
         uint64_t                delta)
  {
-       bool                    has_resv_pool = (counter == &mp->m_fdblocks);
         uint64_t                res_used;
  
         /*
          * If the reserve pool is depleted, put blocks back into it first.
          * Most of the time the pool is full.
          */
-       if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-               percpu_counter_add(counter, delta);
+       if (likely(mp->m_resblks[ctr].total == mp->m_resblks[ctr].avail)) {
+               percpu_counter_add(&mp->m_free[ctr], delta);
                 return;
         }
  
         spin_lock(&mp->m_sb_lock);
-       res_used = mp->m_resblks - mp->m_resblks_avail;
+       res_used = mp->m_resblks[ctr].total - mp->m_resblks[ctr].avail;
         if (res_used > delta) {
-               mp->m_resblks_avail += delta;
+               mp->m_resblks[ctr].avail += delta;
         } else {
                 delta -= res_used;
-               mp->m_resblks_avail = mp->m_resblks;
-               percpu_counter_add(counter, delta);
+               mp->m_resblks[ctr].avail = mp->m_resblks[ctr].total;
+               percpu_counter_add(&mp->m_free[ctr], delta);
         }
         spin_unlock(&mp->m_sb_lock);
  }
  
+
+/* Adjust in-core free blocks or RT extents. */
  int
  xfs_dec_freecounter(
         struct xfs_mount        *mp,
-       struct percpu_counter   *counter,
+       enum xfs_free_counter   ctr,
         uint64_t                delta,
         bool                    rsvd)
  {
+       struct percpu_counter   *counter = &mp->m_free[ctr];
         int64_t                 lcounter;
-       uint64_t                set_aside = 0;
+       uint64_t                set_aside;
         s32                     batch;
-       bool                    has_resv_pool;
  
-       ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
-       has_resv_pool = (counter == &mp->m_fdblocks);
-       if (rsvd)
-               ASSERT(has_resv_pool);
+       ASSERT(ctr < XC_FREE_NR);
  
         /*
          * Taking blocks away, need to be more accurate the closer we
@@ -1295,8 +1312,7 @@ xfs_dec_freecounter(
          * problems (i.e. transaction abort, pagecache discards, etc.) than
          * slightly premature -ENOSPC.
          */
-       if (has_resv_pool)
-               set_aside = xfs_fdblocks_unavailable(mp);
+       set_aside = xfs_freecounter_unavailable(mp, ctr);
         percpu_counter_add_batch(counter, -((int64_t)delta), batch);
         if (__percpu_counter_compare(counter, set_aside,
                                      XFS_FDBLOCKS_BATCH) >= 0) {
@@ -1310,12 +1326,12 @@ xfs_dec_freecounter(
          */
         spin_lock(&mp->m_sb_lock);
         percpu_counter_add(counter, delta);
-       if (!has_resv_pool || !rsvd)
+       if (!rsvd)
                 goto fdblocks_enospc;
  
-       lcounter = (long long)mp->m_resblks_avail - delta;
+       lcounter = (long long)mp->m_resblks[ctr].avail - delta;
         if (lcounter >= 0) {
-               mp->m_resblks_avail = lcounter;
+               mp->m_resblks[ctr].avail = lcounter;
                 spin_unlock(&mp->m_sb_lock);
                 return 0;
         }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index fbed172d677087c070a841fdb2dcfc5064620f24..300ffefb2abdb0519cc673b5a84210bb494e093e 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,19 @@ struct xfs_groups {
         uint64_t                blkmask;
  };
  
+enum xfs_free_counter {
+       /*
+        * Number of free blocks on the data device.
+        */
+       XC_FREE_BLOCKS,
+
+       /*
+        * Number of free RT extents on the RT device.
+        */
+       XC_FREE_RTEXTENTS,
+       XC_FREE_NR,
+};
+
  /*
   * The struct xfsmount layout is optimised to separate read-mostly variables
   * from variables that are frequently modified. We put the read-mostly variables
@@ -222,8 +235,7 @@ typedef struct xfs_mount {
         spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
         struct percpu_counter   m_icount;       /* allocated inodes counter */
         struct percpu_counter   m_ifree;        /* free inodes counter */
-       struct percpu_counter   m_fdblocks;     /* free block counter */
-       struct percpu_counter   m_frextents;    /* free rt extent counter */
+       struct percpu_counter   m_free[XC_FREE_NR];
  
         /*
          * Count of data device blocks reserved for delayed allocations,
@@ -245,9 +257,11 @@ typedef struct xfs_mount {
         atomic64_t              m_allocbt_blks;
  
         struct xfs_groups       m_groups[XG_TYPE_MAX];
-       uint64_t                m_resblks;      /* total reserved blocks */
-       uint64_t                m_resblks_avail;/* available reserved blocks */
-       uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
+       struct {
+               uint64_t        total;          /* total reserved blocks */
+               uint64_t        avail;          /* available reserved blocks */
+               uint64_t        save;           /* reserved blks @ remount,ro */
+       } m_resblks[XC_FREE_NR];
         struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct dentry           *m_debugfs;     /* debugfs parent */
         struct xfs_kobj         m_kobj;
@@ -646,45 +660,74 @@ extern void       xfs_unmountfs(xfs_mount_t *);
   */
  #define XFS_FDBLOCKS_BATCH     1024
  
+uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp,
+               enum xfs_free_counter ctr);
+
  /*
- * Estimate the amount of free space that is not available to userspace and is
- * not explicitly reserved from the incore fdblocks.  This includes:
- *
- * - The minimum number of blocks needed to support splitting a bmap btree
- * - The blocks currently in use by the freespace btrees because they record
- *   the actual blocks that will fill per-AG metadata space reservations
+ * Sum up the freecount, but never return negative values.
   */
-static inline uint64_t
-xfs_fdblocks_unavailable(
-       struct xfs_mount        *mp)
+static inline s64 xfs_sum_freecounter(struct xfs_mount *mp,
+               enum xfs_free_counter ctr)
+{
+       return percpu_counter_sum_positive(&mp->m_free[ctr]);
+}
+
+/*
+ * Same as above, but does return negative values.  Mostly useful for
+ * special cases like repair and tracing.
+ */
+static inline s64 xfs_sum_freecounter_raw(struct xfs_mount *mp,
+               enum xfs_free_counter ctr)
+{
+       return percpu_counter_sum(&mp->m_free[ctr]);
+}
+
+/*
+ * This just provides and estimate without the cpu-local updates, use
+ * xfs_sum_freecounter for the exact value.
+ */
+static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp,
+               enum xfs_free_counter ctr)
+{
+       return percpu_counter_read_positive(&mp->m_free[ctr]);
+}
+
+static inline int xfs_compare_freecounter(struct xfs_mount *mp,
+               enum xfs_free_counter ctr, s64 rhs, s32 batch)
+{
+       return __percpu_counter_compare(&mp->m_free[ctr], rhs, batch);
+}
+
+static inline void xfs_set_freecounter(struct xfs_mount *mp,
+               enum xfs_free_counter ctr, uint64_t val)
  {
-       return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+       percpu_counter_set(&mp->m_free[ctr], val);
  }
  
-int xfs_dec_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+int xfs_dec_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr,
                 uint64_t delta, bool rsvd);
-void xfs_add_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+void xfs_add_freecounter(struct xfs_mount *mp, enum xfs_free_counter ctr,
                 uint64_t delta);
  
  static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta,
                 bool reserved)
  {
-       return xfs_dec_freecounter(mp, &mp->m_fdblocks, delta, reserved);
+       return xfs_dec_freecounter(mp, XC_FREE_BLOCKS, delta, reserved);
  }
  
  static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta)
  {
-       xfs_add_freecounter(mp, &mp->m_fdblocks, delta);
+       xfs_add_freecounter(mp, XC_FREE_BLOCKS, delta);
  }
  
  static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta)
  {
-       return xfs_dec_freecounter(mp, &mp->m_frextents, delta, false);
+       return xfs_dec_freecounter(mp, XC_FREE_RTEXTENTS, delta, false);
  }
  
  static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta)
  {
-       xfs_add_freecounter(mp, &mp->m_frextents, delta);
+       xfs_add_freecounter(mp, XC_FREE_RTEXTENTS, delta);
  }
  
  extern int     xfs_readsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index bc18b694db7547af6f8310d9f5818797a5ac761b..8da2498417f5039af3783ccdd04216d4ce9bd2c8 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1519,7 +1519,7 @@ xfs_rtalloc_reinit_frextents(
         spin_lock(&mp->m_sb_lock);
         mp->m_sb.sb_frextents = val;
         spin_unlock(&mp->m_sb_lock);
-       percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+       xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents);
         return 0;
  }
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index d92d7a07ea89b8781494ed35e87635602266b4a1..f0b0d8320c510bb42eac78ad15ae558cb6cf75df 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -834,10 +834,12 @@ xfs_statfs_data(
         struct kstatfs          *st)
  {
         int64_t                 fdblocks =
-               percpu_counter_sum(&mp->m_fdblocks);
+               xfs_sum_freecounter(mp, XC_FREE_BLOCKS);
  
         /* make sure st->f_bfree does not underflow */
-       st->f_bfree = max(0LL, fdblocks - xfs_fdblocks_unavailable(mp));
+       st->f_bfree = max(0LL,
+               fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS));
+
         /*
          * sb_dblocks can change during growfs, but nothing cares about reporting
          * the old or new value during growfs.
@@ -856,7 +858,7 @@ xfs_statfs_rt(
         struct kstatfs          *st)
  {
         st->f_bfree = xfs_rtbxlen_to_blen(mp,
-                       percpu_counter_sum_positive(&mp->m_frextents));
+                       xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS));
         st->f_blocks = mp->m_sb.sb_rblocks;
  }
  
@@ -922,24 +924,32 @@ xfs_fs_statfs(
  }
  
  STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+       struct xfs_mount        *mp)
  {
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, 0);
+       enum xfs_free_counter   i;
+
+       for (i = 0; i < XC_FREE_NR; i++) {
+               mp->m_resblks[i].save = mp->m_resblks[i].total;
+               xfs_reserve_blocks(mp, i, 0);
+       }
  }
  
  STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+       struct xfs_mount        *mp)
  {
-       uint64_t resblks;
+       uint64_t                resblks;
+       enum xfs_free_counter   i;
  
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
-
-       xfs_reserve_blocks(mp, resblks);
+       for (i = 0; i < XC_FREE_NR; i++) {
+               if (mp->m_resblks[i].save) {
+                       resblks = mp->m_resblks[i].save;
+                       mp->m_resblks[i].save = 0;
+               } else
+                       resblks = xfs_default_resblks(mp);
+               xfs_reserve_blocks(mp, i, resblks);
+       }
  }
  
  /*
@@ -1065,7 +1075,8 @@ static int
  xfs_init_percpu_counters(
         struct xfs_mount        *mp)
  {
-       int             error;
+       int                     error;
+       enum xfs_free_counter   i;
  
         error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
         if (error)
@@ -1075,30 +1086,28 @@ xfs_init_percpu_counters(
         if (error)
                 goto free_icount;
  
-       error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
-       if (error)
-               goto free_ifree;
-
         error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
         if (error)
-               goto free_fdblocks;
+               goto free_ifree;
  
         error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
         if (error)
                 goto free_delalloc;
  
-       error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
-       if (error)
-               goto free_delalloc_rt;
+       for (i = 0; i < XC_FREE_NR; i++) {
+               error = percpu_counter_init(&mp->m_free[i], 0, GFP_KERNEL);
+               if (error)
+                       goto free_freecounters;
+       }
  
         return 0;
  
-free_delalloc_rt:
+free_freecounters:
+       while (--i > 0)
+               percpu_counter_destroy(&mp->m_free[i]);
         percpu_counter_destroy(&mp->m_delalloc_rtextents);
  free_delalloc:
         percpu_counter_destroy(&mp->m_delalloc_blks);
-free_fdblocks:
-       percpu_counter_destroy(&mp->m_fdblocks);
  free_ifree:
         percpu_counter_destroy(&mp->m_ifree);
  free_icount:
@@ -1112,24 +1121,26 @@ xfs_reinit_percpu_counters(
  {
         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
-       percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
-       percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+       xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks);
+       xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents);
  }
  
  static void
  xfs_destroy_percpu_counters(
         struct xfs_mount        *mp)
  {
+       enum xfs_free_counter   i;
+
+       for (i = 0; i < XC_FREE_NR; i++)
+               percpu_counter_destroy(&mp->m_free[i]);
         percpu_counter_destroy(&mp->m_icount);
         percpu_counter_destroy(&mp->m_ifree);
-       percpu_counter_destroy(&mp->m_fdblocks);
         ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
         percpu_counter_destroy(&mp->m_delalloc_rtextents);
         ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
         percpu_counter_destroy(&mp->m_delalloc_blks);
-       percpu_counter_destroy(&mp->m_frextents);
  }
  
  static int
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index b29462363b815f71036f45c2d4f2237297bb7855..7fdcb519cf2f9e1d2b802dbea3e654c6b098618c 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5621,7 +5621,7 @@ DECLARE_EVENT_CLASS(xfs_metafile_resv_class,
  
                 __entry->dev = mp->m_super->s_dev;
                 __entry->ino = ip->i_ino;
-               __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+               __entry->freeblks = xfs_sum_freecounter_raw(mp, XC_FREE_BLOCKS);
                 __entry->reserved = ip->i_delayed_blks;
                 __entry->asked = ip->i_meta_resv_asked;
                 __entry->used = ip->i_nblocks;
author	Christoph Hellwig <hch@lst.de>
	Mon, 16 Dec 2024 07:10:35 +0000 (08:10 +0100)
committer	Christoph Hellwig <hch@lst.de>
	Mon, 3 Feb 2025 04:49:07 +0000 (05:49 +0100)
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_metafile.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/scrub/fscounters.c		patch \| blob \| history
fs/xfs/scrub/fscounters_repair.c		patch \| blob \| history
fs/xfs/scrub/newbt.c		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_fsops.h		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history