xfs: generalize the freespace and reserved blocks handling

author Christoph Hellwig <hch@lst.de>

Mon, 22 Jul 2024 13:20:19 +0000 (06:20 -0700)

committer Christoph Hellwig <hch@lst.de>

Wed, 24 Jul 2024 14:08:11 +0000 (07:08 -0700)
author Christoph Hellwig <hch@lst.de>
Mon, 22 Jul 2024 13:20:19 +0000 (06:20 -0700)
committer Christoph Hellwig <hch@lst.de>
Wed, 24 Jul 2024 14:08:11 +0000 (07:08 -0700)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 34344e7e509e59e79a09dfefc7539f50a97d2cc4..defd853f9c496c9d019d55bcc95ec1334039f7c7 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1929,7 +1929,7 @@ xfs_dialloc(
          * that we can immediately allocate, but then we allow allocation on the
          * second pass if we fail to find an AG with free inodes in it.
          */
-       if (percpu_counter_read_positive(&mp->m_fdblocks) <
+       if (xfs_estimate_freecounter(mp, FREE_BLOCKS) <
                         mp->m_low_space[XFS_LOWSP_1_PCNT]) {
                 ok_alloc = false;
                 low_space = true;
diff --git a/fs/xfs/libxfs/xfs_imeta.c b/fs/xfs/libxfs/xfs_imeta.c

index 45c91cc205b6f897cba87f65e92d3701121a517a..3323808ce645ebffa5ed06abbf9b5d0acd699809 100644 (file)
--- a/fs/xfs/libxfs/xfs_imeta.c
+++ b/fs/xfs/libxfs/xfs_imeta.c
@@ -548,7 +548,7 @@ xfs_imeta_resv_can_cover(
          * There aren't enough blocks left in the inode's reservation, but it
          * isn't critical unless there also isn't enough free space.
          */
-       return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
+       return xfs_compare_freecounter(ip->i_mount, FREE_BLOCKS,
                         rhs - ip->i_delayed_blks, 2048) >= 0;
  }
  
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index 58a08c2c8e63ac8472eb61f8c337b819d39c1f92..b792e22b2ef01f5f6acc84d5dd92f49bbe460085 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1178,12 +1178,10 @@ xfs_log_sb(
                 mp->m_sb.sb_ifree = min_t(uint64_t,
                                 percpu_counter_sum_positive(&mp->m_ifree),
                                 mp->m_sb.sb_icount);
-               mp->m_sb.sb_fdblocks =
-                               percpu_counter_sum_positive(&mp->m_fdblocks);
+               mp->m_sb.sb_fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
         }
         if (xfs_has_rtgroups(mp))
-               mp->m_sb.sb_frextents =
-                               percpu_counter_sum_positive(&mp->m_frextents);
+               mp->m_sb.sb_frextents = xfs_sum_freecounter(mp, FREE_RTEXTENTS);
  
         xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c

index 1d3e98346933e1100d951af3ce77618b1d00aa0f..dfb6174d8660283b7ad012d98314d8f913430065 100644 (file)
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -351,7 +351,7 @@ retry:
          * The global incore space reservation is taken from the incore
          * counters, so leave that out of the computation.
          */
-       fsc->fdblocks -= mp->m_resblks_avail;
+       fsc->fdblocks -= mp->m_resblks[FREE_BLOCKS].avail;
  
         /*
          * Delayed allocation reservations are taken out of the incore counters
@@ -512,8 +512,8 @@ xchk_fscounters(
         /* Snapshot the percpu counters. */
         icount = percpu_counter_sum(&mp->m_icount);
         ifree = percpu_counter_sum(&mp->m_ifree);
-       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
-       frextents = percpu_counter_sum(&mp->m_frextents);
+       fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
+       frextents = xfs_sum_freecounter(mp, FREE_RTEXTENTS);
  
         /* No negative values, please! */
         if (icount < 0 || ifree < 0)
@@ -588,7 +588,7 @@ xchk_fscounters(
                         try_again = true;
         }
  
-       if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
+       if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_free[FREE_BLOCKS],
                         fsc->fdblocks)) {
                 if (fsc->frozen)
                         xchk_set_corrupt(sc);
@@ -596,7 +596,8 @@ xchk_fscounters(
                         try_again = true;
         }
  
-       if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+       if (!xchk_fscount_within_range(sc, frextents,
+                       &mp->m_free[FREE_RTEXTENTS],
                         fsc->frextents - fsc->frextents_delayed)) {
                 if (fsc->frozen)
                         xchk_set_corrupt(sc);
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c

index cda13447a373e14240acbd3f33d153138e8e141e..08b6c7b5e1b7ea71729283c48c923cb82aac1ec1 100644 (file)
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -64,7 +64,7 @@ xrep_fscounters(
  
         percpu_counter_set(&mp->m_icount, fsc->icount);
         percpu_counter_set(&mp->m_ifree, fsc->ifree);
-       percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+       percpu_counter_set(&mp->m_free[FREE_BLOCKS], fsc->fdblocks);
  
         /*
          * Online repair is only supported on v5 file systems, which require
@@ -74,7 +74,7 @@ xrep_fscounters(
          * track of the delalloc reservations separately, as they are are
          * subtracted from m_frextents, but not included in sb_frextents.
          */
-       percpu_counter_set(&mp->m_frextents,
+       percpu_counter_set(&mp->m_free[FREE_RTEXTENTS],
                 fsc->frextents - fsc->frextents_delayed);
         if (!xfs_has_rtgroups(mp))
                 mp->m_sb.sb_frextents = fsc->frextents;
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c

index b34436c5163f8696854a9514606aa35172b3ce8b..3d9579993fbfc1135852f5961805a583f6365b1c 100644 (file)
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -62,7 +62,7 @@ xrep_newbt_estimate_slack(
                 free = sc->sa.pag->pagf_freeblks;
                 sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
         } else {
-               free = percpu_counter_sum(&sc->mp->m_fdblocks);
+               free = xfs_sum_freecounter(sc->mp, FREE_BLOCKS);
                 sz = sc->mp->m_sb.sb_dblocks;
         }
  
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 231d89de77fa899dca1ba1651ffd9bb9d3b31cfb..8e1a2ab4321c3ae868a863c98c47d21ffe316dfe 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -371,6 +371,7 @@ xfs_growfs_log(
  int
  xfs_reserve_blocks(
         struct xfs_mount        *mp,
+       unsigned int            idx,
         uint64_t                request)
  {
         int64_t                 lcounter, delta;
@@ -378,6 +379,8 @@ xfs_reserve_blocks(
         int64_t                 free;
         int                     error = 0;
  
+       ASSERT(idx < FREE_NR);
+
         /*
          * With per-cpu counters, this becomes an interesting problem. we need
          * to work out if we are freeing or allocation blocks first, then we can
@@ -396,16 +399,16 @@ xfs_reserve_blocks(
          * counters directly since we shouldn't have any problems unreserving
          * space.
          */
-       if (mp->m_resblks > request) {
-               lcounter = mp->m_resblks_avail - request;
+       if (mp->m_resblks[idx].total > request) {
+               lcounter = mp->m_resblks[idx].avail - request;
                 if (lcounter > 0) {             /* release unused blocks */
                         fdblks_delta = lcounter;
-                       mp->m_resblks_avail -= lcounter;
+                       mp->m_resblks[idx].avail -= lcounter;
                 }
-               mp->m_resblks = request;
+               mp->m_resblks[idx].total = request;
                 if (fdblks_delta) {
                         spin_unlock(&mp->m_sb_lock);
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, idx, fdblks_delta);
                         spin_lock(&mp->m_sb_lock);
                 }
  
@@ -414,7 +417,7 @@ xfs_reserve_blocks(
  
         /*
          * If the request is larger than the current reservation, reserve the
-        * blocks before we update the reserve counters. Sample m_fdblocks and
+        * blocks before we update the reserve counters. Sample m_free and
          * perform a partial reservation if the request exceeds free space.
          *
          * The code below estimates how many blocks it can request from
@@ -424,10 +427,10 @@ xfs_reserve_blocks(
          * space to fill it because mod_fdblocks will refill an undersized
          * reserve when it can.
          */
-       free = percpu_counter_sum(&mp->m_fdblocks) -
-                                               xfs_fdblocks_unavailable(mp);
-       delta = request - mp->m_resblks;
-       mp->m_resblks = request;
+       free = xfs_sum_freecounter(mp, idx) -
+               xfs_freecounter_unavailable(mp, idx);
+       delta = request - mp->m_resblks[idx].total;
+       mp->m_resblks[idx].total = request;
         if (delta > 0 && free > 0) {
                 /*
                  * We'll either succeed in getting space from the free block
@@ -441,9 +444,9 @@ xfs_reserve_blocks(
                  */
                 fdblks_delta = min(free, delta);
                 spin_unlock(&mp->m_sb_lock);
-               error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+               error = xfs_dec_freecounter(mp, idx, fdblks_delta, 0);
                 if (!error)
-                       xfs_add_fdblocks(mp, fdblks_delta);
+                       xfs_add_freecounter(mp, idx, fdblks_delta);
                 spin_lock(&mp->m_sb_lock);
         }
  out:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h

index 3e2f73bcf8314b87520cbb7ae0bf6542f756b5d5..38dbccf1fe146952ddc17e8beba3f7caf3441c75 100644 (file)
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -8,7 +8,8 @@
  
  int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
  int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, unsigned int idx,
+               uint64_t request);
  int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
  
  int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index a0a307713896b57a77ab71cc977d865840e5e89d..f459000eeafbf6eb5fda15a8459b90e39c8207f1 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -2024,7 +2024,7 @@ xfs_inodegc_want_queue_rt_file(
         if (!XFS_IS_REALTIME_INODE(ip))
                 return false;
  
-       if (__percpu_counter_compare(&mp->m_frextents,
+       if (xfs_compare_freecounter(mp, FREE_RTEXTENTS,
                                 mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
                                 XFS_FDBLOCKS_BATCH) < 0)
                 return true;
@@ -2052,7 +2052,7 @@ xfs_inodegc_want_queue_work(
         if (items > mp->m_ino_geo.inodes_per_cluster)
                 return true;
  
-       if (__percpu_counter_compare(&mp->m_fdblocks,
+       if (xfs_compare_freecounter(mp, FREE_BLOCKS,
                                 mp->m_low_space[XFS_LOWSP_5_PCNT],
                                 XFS_FDBLOCKS_BATCH) < 0)
                 return true;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index 1311c6d076f2e847b241c6dc9d5f58b0e0de25b8..4a5c42b7c20c2173234df03ec55cc29b4e88e034 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1275,15 +1275,15 @@ xfs_ioctl_getset_resblocks(
                 error = mnt_want_write_file(filp);
                 if (error)
                         return error;
-               error = xfs_reserve_blocks(mp, fsop.resblks);
+               error = xfs_reserve_blocks(mp, FREE_BLOCKS, fsop.resblks);
                 mnt_drop_write_file(filp);
                 if (error)
                         return error;
         }
  
         spin_lock(&mp->m_sb_lock);
-       fsop.resblks = mp->m_resblks;
-       fsop.resblks_avail = mp->m_resblks_avail;
+       fsop.resblks = mp->m_resblks[FREE_BLOCKS].total;
+       fsop.resblks_avail = mp->m_resblks[FREE_BLOCKS].avail;
         spin_unlock(&mp->m_sb_lock);
  
         if (copy_to_user(arg, &fsop, sizeof(fsop)))
@@ -1299,9 +1299,9 @@ xfs_ioctl_fs_counts(
         struct xfs_fsop_counts  out = {
                 .allocino = percpu_counter_read_positive(&mp->m_icount),
                 .freeino  = percpu_counter_read_positive(&mp->m_ifree),
-               .freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
-                               xfs_fdblocks_unavailable(mp),
-               .freertx  = percpu_counter_read_positive(&mp->m_frextents),
+               .freedata = xfs_estimate_freecounter(mp, FREE_BLOCKS) -
+                               xfs_freecounter_unavailable(mp, FREE_BLOCKS),
+               .freertx  = xfs_estimate_freecounter(mp, FREE_RTEXTENTS),
         };
  
         if (copy_to_user(uarg, &out, sizeof(out)))
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index fc340e7b78ed5f137ecff0c5118555cbf1494be4..a2e5cb9e0ab973629569589d4627840e975219d0 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -409,13 +409,14 @@ xfs_quota_calc_throttle(
  
  static int64_t
  xfs_iomap_freesp(
-       struct percpu_counter   *counter,
+       struct xfs_mount        *mp,
+       unsigned int            idx,
         uint64_t                low_space[XFS_LOWSP_MAX],
         int                     *shift)
  {
         int64_t                 freesp;
  
-       freesp = percpu_counter_read_positive(counter);
+       freesp = xfs_estimate_freecounter(mp, idx);
         if (freesp < low_space[XFS_LOWSP_5_PCNT]) {
                 *shift = 2;
                 if (freesp < low_space[XFS_LOWSP_4_PCNT])
@@ -514,10 +515,10 @@ xfs_iomap_prealloc_size(
  
         if (unlikely(XFS_IS_REALTIME_INODE(ip)))
                 freesp = xfs_rtx_to_rtb(mp,
-                       xfs_iomap_freesp(&mp->m_frextents,
+                       xfs_iomap_freesp(mp, FREE_RTEXTENTS,
                                         mp->m_low_rtexts, &shift));
         else
-               freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space,
+               freesp = xfs_iomap_freesp(mp, FREE_BLOCKS, mp->m_low_space,
                                 &shift);
  
         /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 12ebafb0b8d4e3c2431d94838b4244f7e58325d2..f1c7aaf200e4dbbcaa5ecf673b7681034ec6bd53 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1048,7 +1048,8 @@ xfs_mountfs(
          * we were already there on the last unmount. Warn if this occurs.
          */
         if (!xfs_is_readonly(mp)) {
-               error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
+               error = xfs_reserve_blocks(mp, FREE_BLOCKS,
+                               xfs_default_resblks(mp));
                 if (error)
                         xfs_warn(mp,
         "Unable to allocate reserve blocks. Continuing without reserve pool.");
@@ -1168,7 +1169,7 @@ xfs_unmountfs(
          * we only every apply deltas to the superblock and hence the incore
          * value does not matter....
          */
-       error = xfs_reserve_blocks(mp, 0);
+       error = xfs_reserve_blocks(mp, FREE_BLOCKS, 0);
         if (error)
                 xfs_warn(mp, "Unable to free reserved block pool. "
                                 "Freespace may not be correct on next mount.");
@@ -1216,52 +1217,68 @@ xfs_fs_writable(
         return true;
  }
  
+/*
+ * Estimate the amount of free space that is not available to userspace and is
+ * not explicitly reserved from the incore fdblocks.  This includes:
+ *
+ * - The minimum number of blocks needed to support splitting a bmap btree
+ * - The blocks currently in use by the freespace btrees because they record
+ *   the actual blocks that will fill per-AG metadata space reservations
+ */
+uint64_t
+xfs_freecounter_unavailable(
+       struct xfs_mount        *mp,
+       unsigned int            idx)
+{
+       if (idx == FREE_RTEXTENTS)
+               return 0;
+       return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+}
+
  void
  xfs_add_freecounter(
         struct xfs_mount        *mp,
-       struct percpu_counter   *counter,
+       unsigned int            idx,
         uint64_t                delta)
  {
-       bool                    has_resv_pool = (counter == &mp->m_fdblocks);
         uint64_t                res_used;
  
         /*
          * If the reserve pool is depleted, put blocks back into it first.
          * Most of the time the pool is full.
          */
-       if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
-               percpu_counter_add(counter, delta);
+       if (likely(mp->m_resblks[idx].total == mp->m_resblks[idx].avail)) {
+               percpu_counter_add(&mp->m_free[idx], delta);
                 return;
         }
  
         spin_lock(&mp->m_sb_lock);
-       res_used = mp->m_resblks - mp->m_resblks_avail;
+       res_used = mp->m_resblks[idx].total - mp->m_resblks[idx].avail;
         if (res_used > delta) {
-               mp->m_resblks_avail += delta;
+               mp->m_resblks[idx].avail += delta;
         } else {
                 delta -= res_used;
-               mp->m_resblks_avail = mp->m_resblks;
-               percpu_counter_add(counter, delta);
+               mp->m_resblks[idx].avail = mp->m_resblks[idx].total;
+               percpu_counter_add(&mp->m_free[idx], delta);
         }
         spin_unlock(&mp->m_sb_lock);
  }
  
+
+/* Adjust in-core free blocks or RT extents. */
  int
  xfs_dec_freecounter(
         struct xfs_mount        *mp,
-       struct percpu_counter   *counter,
+       unsigned int            idx,
         uint64_t                delta,
         bool                    rsvd)
  {
+       struct percpu_counter   *counter = &mp->m_free[idx];
         int64_t                 lcounter;
         uint64_t                set_aside = 0;
         s32                     batch;
-       bool                    has_resv_pool;
  
-       ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
-       has_resv_pool = (counter == &mp->m_fdblocks);
-       if (rsvd)
-               ASSERT(has_resv_pool);
+       ASSERT(idx < FREE_NR);
  
         /*
          * Taking blocks away, need to be more accurate the closer we
@@ -1288,8 +1305,7 @@ xfs_dec_freecounter(
          * problems (i.e. transaction abort, pagecache discards, etc.) than
          * slightly premature -ENOSPC.
          */
-       if (has_resv_pool)
-               set_aside = xfs_fdblocks_unavailable(mp);
+       set_aside = xfs_freecounter_unavailable(mp, idx);
         percpu_counter_add_batch(counter, -((int64_t)delta), batch);
         if (__percpu_counter_compare(counter, set_aside,
                                      XFS_FDBLOCKS_BATCH) >= 0) {
@@ -1303,12 +1319,12 @@ xfs_dec_freecounter(
          */
         spin_lock(&mp->m_sb_lock);
         percpu_counter_add(counter, delta);
-       if (!has_resv_pool || !rsvd)
+       if (!rsvd)
                 goto fdblocks_enospc;
  
-       lcounter = (long long)mp->m_resblks_avail - delta;
+       lcounter = (long long)mp->m_resblks[idx].avail - delta;
         if (lcounter >= 0) {
-               mp->m_resblks_avail = lcounter;
+               mp->m_resblks[idx].avail = lcounter;
                 spin_unlock(&mp->m_sb_lock);
                 return 0;
         }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 8b63932a7c664b9af3005652a9deb69bef781e1f..454b9eaf7cf306889770626002fc234ca076ca26 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -201,8 +201,11 @@ typedef struct xfs_mount {
         spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
         struct percpu_counter   m_icount;       /* allocated inodes counter */
         struct percpu_counter   m_ifree;        /* free inodes counter */
-       struct percpu_counter   m_fdblocks;     /* free block counter */
-       struct percpu_counter   m_frextents;    /* free rt extent counter */
+
+#define FREE_BLOCKS            0       /* free block counter */
+#define FREE_RTEXTENTS         1       /* free rt extent counter */
+#define FREE_NR                        2
+       struct percpu_counter   m_free[FREE_NR];
  
         /*
          * Count of data device blocks reserved for delayed allocations,
@@ -226,9 +229,11 @@ typedef struct xfs_mount {
         struct xarray           m_rtgroups;     /* per-rt group info */
         struct radix_tree_root  m_perag_tree;   /* per-ag accounting info */
         spinlock_t              m_perag_lock;   /* lock for m_perag_tree */
-       uint64_t                m_resblks;      /* total reserved blocks */
-       uint64_t                m_resblks_avail;/* available reserved blocks */
-       uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
+       struct {
+               uint64_t        total;          /* total reserved blocks */
+               uint64_t        avail;          /* available reserved blocks */
+               uint64_t        save;           /* reserved blks @ remount,ro */
+       } m_resblks[FREE_NR];
         struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct dentry           *m_debugfs;     /* debugfs parent */
         struct xfs_kobj         m_kobj;
@@ -592,45 +597,53 @@ extern void       xfs_unmountfs(xfs_mount_t *);
   */
  #define XFS_FDBLOCKS_BATCH     1024
  
+uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp, unsigned int idx);
+
+static inline s64 xfs_sum_freecounter(struct xfs_mount *mp, unsigned int idx)
+{
+       return percpu_counter_sum(&mp->m_free[idx]);
+}
+
  /*
- * Estimate the amount of free space that is not available to userspace and is
- * not explicitly reserved from the incore fdblocks.  This includes:
- *
- * - The minimum number of blocks needed to support splitting a bmap btree
- * - The blocks currently in use by the freespace btrees because they record
- *   the actual blocks that will fill per-AG metadata space reservations
+ * This just provides and estimate without the cpu-local updates, use
+ * xfs_sum_freecounter for the exact value.
   */
-static inline uint64_t
-xfs_fdblocks_unavailable(
-       struct xfs_mount        *mp)
+static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp,
+               unsigned int idx)
+{
+       return percpu_counter_read_positive(&mp->m_free[idx]);
+}
+
+static inline int xfs_compare_freecounter(struct xfs_mount *mp,
+               unsigned int idx, s64 rhs, s32 batch)
  {
-       return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+       return __percpu_counter_compare(&mp->m_free[idx], rhs, batch);
  }
  
-int xfs_dec_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
-               uint64_t delta, bool rsvd);
-void xfs_add_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+int xfs_dec_freecounter(struct xfs_mount *mp, unsigned int idx, uint64_t delta,
+               bool rsvd);
+void xfs_add_freecounter(struct xfs_mount *mp, unsigned int idx,
                 uint64_t delta);
  
  static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta,
                 bool reserved)
  {
-       return xfs_dec_freecounter(mp, &mp->m_fdblocks, delta, reserved);
+       return xfs_dec_freecounter(mp, FREE_BLOCKS, delta, reserved);
  }
  
  static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta)
  {
-       xfs_add_freecounter(mp, &mp->m_fdblocks, delta);
+       xfs_add_freecounter(mp, FREE_BLOCKS, delta);
  }
  
  static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta)
  {
-       return xfs_dec_freecounter(mp, &mp->m_frextents, delta, false);
+       return xfs_dec_freecounter(mp, FREE_RTEXTENTS, delta, false);
  }
  
  static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta)
  {
-       xfs_add_freecounter(mp, &mp->m_frextents, delta);
+       xfs_add_freecounter(mp, FREE_RTEXTENTS, delta);
  }
  
  extern int     xfs_readsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 16fac67c101118fd76a16f0d00bb8be6d875f659..a94dd439dccda660051a48a185e210d4251dbbb0 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1558,7 +1558,7 @@ xfs_rtalloc_reinit_frextents(
         spin_lock(&mp->m_sb_lock);
         mp->m_sb.sb_frextents = val;
         spin_unlock(&mp->m_sb_lock);
-       percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+       percpu_counter_set(&mp->m_free[FREE_RTEXTENTS], mp->m_sb.sb_frextents);
         return 0;
  }
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index a2233f5f837ef90a71a2659677e4e98131ed8bea..e541c4e751870231e439802883baafb37565254a 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -845,7 +845,7 @@ xfs_fs_statfs(
  
         icount = percpu_counter_sum(&mp->m_icount);
         ifree = percpu_counter_sum(&mp->m_ifree);
-       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+       fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
  
         spin_lock(&mp->m_sb_lock);
         statp->f_bsize = sbp->sb_blocksize;
@@ -855,7 +855,7 @@ xfs_fs_statfs(
  
         /* make sure statp->f_bfree does not underflow */
         statp->f_bfree = max_t(int64_t, 0,
-                               fdblocks - xfs_fdblocks_unavailable(mp));
+               fdblocks - xfs_freecounter_unavailable(mp, FREE_BLOCKS));
         statp->f_bavail = statp->f_bfree;
  
         fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
@@ -885,7 +885,7 @@ xfs_fs_statfs(
                 s64     freertx;
  
                 statp->f_blocks = sbp->sb_rblocks;
-               freertx = percpu_counter_sum_positive(&mp->m_frextents);
+               freertx = max_t(int64_t, 0, xfs_sum_freecounter(mp, FREE_RTEXTENTS));
                 statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
         }
  
@@ -893,24 +893,32 @@ xfs_fs_statfs(
  }
  
  STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+       struct xfs_mount        *mp)
  {
-       mp->m_resblks_save = mp->m_resblks;
-       xfs_reserve_blocks(mp, 0);
+       unsigned int            i;
+
+       for (i = 0; i < FREE_NR; i++) {
+               mp->m_resblks[i].save = mp->m_resblks[i].total;
+               xfs_reserve_blocks(mp, i, 0);
+       }
  }
  
  STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+       struct xfs_mount        *mp)
  {
-       uint64_t resblks;
-
-       if (mp->m_resblks_save) {
-               resblks = mp->m_resblks_save;
-               mp->m_resblks_save = 0;
-       } else
-               resblks = xfs_default_resblks(mp);
+       uint64_t                resblks;
+       unsigned int            i;
  
-       xfs_reserve_blocks(mp, resblks);
+       for (i = 0; i < FREE_NR; i++) {
+               if (mp->m_resblks[i].save) {
+                       resblks = mp->m_resblks[i].save;
+                       mp->m_resblks[i].save = 0;
+               } else
+                       resblks = xfs_default_resblks(mp);
+               xfs_reserve_blocks(mp, i, resblks);
+       }
  }
  
  /*
@@ -1046,7 +1054,7 @@ xfs_init_percpu_counters(
         if (error)
                 goto free_icount;
  
-       error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+       error = percpu_counter_init(&mp->m_free[FREE_BLOCKS], 0, GFP_KERNEL);
         if (error)
                 goto free_ifree;
  
@@ -1058,7 +1066,7 @@ xfs_init_percpu_counters(
         if (error)
                 goto free_delalloc;
  
-       error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
+       error = percpu_counter_init(&mp->m_free[FREE_RTEXTENTS], 0, GFP_KERNEL);
         if (error)
                 goto free_delalloc_rt;
  
@@ -1069,7 +1077,7 @@ free_delalloc_rt:
  free_delalloc:
         percpu_counter_destroy(&mp->m_delalloc_blks);
  free_fdblocks:
-       percpu_counter_destroy(&mp->m_fdblocks);
+       percpu_counter_destroy(&mp->m_free[FREE_BLOCKS]);
  free_ifree:
         percpu_counter_destroy(&mp->m_ifree);
  free_icount:
@@ -1083,8 +1091,8 @@ xfs_reinit_percpu_counters(
  {
         percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
         percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
-       percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
-       percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+       percpu_counter_set(&mp->m_free[FREE_BLOCKS], mp->m_sb.sb_fdblocks);
+       percpu_counter_set(&mp->m_free[FREE_RTEXTENTS], mp->m_sb.sb_frextents);
  }
  
  static void
@@ -1093,14 +1101,14 @@ xfs_destroy_percpu_counters(
  {
         percpu_counter_destroy(&mp->m_icount);
         percpu_counter_destroy(&mp->m_ifree);
-       percpu_counter_destroy(&mp->m_fdblocks);
+       percpu_counter_destroy(&mp->m_free[FREE_BLOCKS]);
         ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
         percpu_counter_destroy(&mp->m_delalloc_rtextents);
         ASSERT(xfs_is_shutdown(mp) ||
                percpu_counter_sum(&mp->m_delalloc_blks) == 0);
         percpu_counter_destroy(&mp->m_delalloc_blks);
-       percpu_counter_destroy(&mp->m_frextents);
+       percpu_counter_destroy(&mp->m_free[FREE_RTEXTENTS]);
  }
  
  static int
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 6bd1424feb06bd5b74d9cb07c459d9a795dcf8a5..093e74577821866b68d284389dd0d6579003aceb 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5573,7 +5573,7 @@ DECLARE_EVENT_CLASS(xfs_imeta_resv_class,
  
                 __entry->dev = mp->m_super->s_dev;
                 __entry->ino = ip->i_ino;
-               __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+               __entry->freeblks = xfs_sum_freecounter(mp, FREE_BLOCKS);
                 __entry->reserved = ip->i_delayed_blks;
                 __entry->asked = ip->i_meta_resv_asked;
                 __entry->used = ip->i_nblocks;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index dc90856e1e9bba0bc66c4ed11c4d9a7b759c3a4a..bdecdb2d9a360a21514891f7cdd95555b76f0285 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -514,8 +514,8 @@ xfs_trans_apply_sb_deltas(
          * update the incore superblock so that future calls to xfs_log_sb
          * write the correct value ondisk.
          *
-        * Don't touch m_frextents because it includes incore reservations,
-        * and those are handled by the unreserve function.
+        * Don't touch m_free[FREE_RTEXTENTS] because it includes incore
+        * reservations, and those are handled by the unreserve function.
          *
          * sb_frextents was added to the lazy sb counters when the rt groups
          * feature was introduced.  This is possible because we know that all
author	Christoph Hellwig <hch@lst.de>
	Mon, 22 Jul 2024 13:20:19 +0000 (06:20 -0700)
committer	Christoph Hellwig <hch@lst.de>
	Wed, 24 Jul 2024 14:08:11 +0000 (07:08 -0700)
fs/xfs/libxfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/libxfs/xfs_imeta.c		patch \| blob \| history
fs/xfs/libxfs/xfs_sb.c		patch \| blob \| history
fs/xfs/scrub/fscounters.c		patch \| blob \| history
fs/xfs/scrub/fscounters_repair.c		patch \| blob \| history
fs/xfs/scrub/newbt.c		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_fsops.h		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_ioctl.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history