* that we can immediately allocate, but then we allow allocation on the
* second pass if we fail to find an AG with free inodes in it.
*/
- if (percpu_counter_read_positive(&mp->m_fdblocks) <
+ if (xfs_estimate_freecounter(mp, FREE_BLOCKS) <
mp->m_low_space[XFS_LOWSP_1_PCNT]) {
ok_alloc = false;
low_space = true;
* There aren't enough blocks left in the inode's reservation, but it
* isn't critical unless there also isn't enough free space.
*/
- return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
+ return xfs_compare_freecounter(ip->i_mount, FREE_BLOCKS,
rhs - ip->i_delayed_blks, 2048) >= 0;
}
mp->m_sb.sb_ifree = min_t(uint64_t,
percpu_counter_sum_positive(&mp->m_ifree),
mp->m_sb.sb_icount);
- mp->m_sb.sb_fdblocks =
- percpu_counter_sum_positive(&mp->m_fdblocks);
+ mp->m_sb.sb_fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
}
if (xfs_has_rtgroups(mp))
- mp->m_sb.sb_frextents =
- percpu_counter_sum_positive(&mp->m_frextents);
+ mp->m_sb.sb_frextents = xfs_sum_freecounter(mp, FREE_RTEXTENTS);
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
* The global incore space reservation is taken from the incore
* counters, so leave that out of the computation.
*/
- fsc->fdblocks -= mp->m_resblks_avail;
+ fsc->fdblocks -= mp->m_resblks[FREE_BLOCKS].avail;
/*
* Delayed allocation reservations are taken out of the incore counters
/* Snapshot the percpu counters. */
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
- fdblocks = percpu_counter_sum(&mp->m_fdblocks);
- frextents = percpu_counter_sum(&mp->m_frextents);
+ fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
+ frextents = xfs_sum_freecounter(mp, FREE_RTEXTENTS);
/* No negative values, please! */
if (icount < 0 || ifree < 0)
try_again = true;
}
- if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
+ if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_free[FREE_BLOCKS],
fsc->fdblocks)) {
if (fsc->frozen)
xchk_set_corrupt(sc);
try_again = true;
}
- if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+ if (!xchk_fscount_within_range(sc, frextents,
+ &mp->m_free[FREE_RTEXTENTS],
fsc->frextents - fsc->frextents_delayed)) {
if (fsc->frozen)
xchk_set_corrupt(sc);
percpu_counter_set(&mp->m_icount, fsc->icount);
percpu_counter_set(&mp->m_ifree, fsc->ifree);
- percpu_counter_set(&mp->m_fdblocks, fsc->fdblocks);
+ percpu_counter_set(&mp->m_free[FREE_BLOCKS], fsc->fdblocks);
/*
* Online repair is only supported on v5 file systems, which require
* track of the delalloc reservations separately, as they are are
* subtracted from m_frextents, but not included in sb_frextents.
*/
- percpu_counter_set(&mp->m_frextents,
+ percpu_counter_set(&mp->m_free[FREE_RTEXTENTS],
fsc->frextents - fsc->frextents_delayed);
if (!xfs_has_rtgroups(mp))
mp->m_sb.sb_frextents = fsc->frextents;
free = sc->sa.pag->pagf_freeblks;
sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
} else {
- free = percpu_counter_sum(&sc->mp->m_fdblocks);
+ free = xfs_sum_freecounter(sc->mp, FREE_BLOCKS);
sz = sc->mp->m_sb.sb_dblocks;
}
int
xfs_reserve_blocks(
struct xfs_mount *mp,
+ unsigned int idx,
uint64_t request)
{
int64_t lcounter, delta;
int64_t free;
int error = 0;
+ ASSERT(idx < FREE_NR);
+
/*
* With per-cpu counters, this becomes an interesting problem. we need
* to work out if we are freeing or allocation blocks first, then we can
* counters directly since we shouldn't have any problems unreserving
* space.
*/
- if (mp->m_resblks > request) {
- lcounter = mp->m_resblks_avail - request;
+ if (mp->m_resblks[idx].total > request) {
+ lcounter = mp->m_resblks[idx].avail - request;
if (lcounter > 0) { /* release unused blocks */
fdblks_delta = lcounter;
- mp->m_resblks_avail -= lcounter;
+ mp->m_resblks[idx].avail -= lcounter;
}
- mp->m_resblks = request;
+ mp->m_resblks[idx].total = request;
if (fdblks_delta) {
spin_unlock(&mp->m_sb_lock);
- xfs_add_fdblocks(mp, fdblks_delta);
+ xfs_add_freecounter(mp, idx, fdblks_delta);
spin_lock(&mp->m_sb_lock);
}
/*
* If the request is larger than the current reservation, reserve the
- * blocks before we update the reserve counters. Sample m_fdblocks and
+ * blocks before we update the reserve counters. Sample m_free and
* perform a partial reservation if the request exceeds free space.
*
* The code below estimates how many blocks it can request from
* space to fill it because mod_fdblocks will refill an undersized
* reserve when it can.
*/
- free = percpu_counter_sum(&mp->m_fdblocks) -
- xfs_fdblocks_unavailable(mp);
- delta = request - mp->m_resblks;
- mp->m_resblks = request;
+ free = xfs_sum_freecounter(mp, idx) -
+ xfs_freecounter_unavailable(mp, idx);
+ delta = request - mp->m_resblks[idx].total;
+ mp->m_resblks[idx].total = request;
if (delta > 0 && free > 0) {
/*
* We'll either succeed in getting space from the free block
*/
fdblks_delta = min(free, delta);
spin_unlock(&mp->m_sb_lock);
- error = xfs_dec_fdblocks(mp, fdblks_delta, 0);
+ error = xfs_dec_freecounter(mp, idx, fdblks_delta, 0);
if (!error)
- xfs_add_fdblocks(mp, fdblks_delta);
+ xfs_add_freecounter(mp, idx, fdblks_delta);
spin_lock(&mp->m_sb_lock);
}
out:
int xfs_growfs_data(struct xfs_mount *mp, struct xfs_growfs_data *in);
int xfs_growfs_log(struct xfs_mount *mp, struct xfs_growfs_log *in);
-int xfs_reserve_blocks(struct xfs_mount *mp, uint64_t request);
+int xfs_reserve_blocks(struct xfs_mount *mp, unsigned int idx,
+ uint64_t request);
int xfs_fs_goingdown(struct xfs_mount *mp, uint32_t inflags);
int xfs_fs_reserve_ag_blocks(struct xfs_mount *mp);
if (!XFS_IS_REALTIME_INODE(ip))
return false;
- if (__percpu_counter_compare(&mp->m_frextents,
+ if (xfs_compare_freecounter(mp, FREE_RTEXTENTS,
mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
XFS_FDBLOCKS_BATCH) < 0)
return true;
if (items > mp->m_ino_geo.inodes_per_cluster)
return true;
- if (__percpu_counter_compare(&mp->m_fdblocks,
+ if (xfs_compare_freecounter(mp, FREE_BLOCKS,
mp->m_low_space[XFS_LOWSP_5_PCNT],
XFS_FDBLOCKS_BATCH) < 0)
return true;
error = mnt_want_write_file(filp);
if (error)
return error;
- error = xfs_reserve_blocks(mp, fsop.resblks);
+ error = xfs_reserve_blocks(mp, FREE_BLOCKS, fsop.resblks);
mnt_drop_write_file(filp);
if (error)
return error;
}
spin_lock(&mp->m_sb_lock);
- fsop.resblks = mp->m_resblks;
- fsop.resblks_avail = mp->m_resblks_avail;
+ fsop.resblks = mp->m_resblks[FREE_BLOCKS].total;
+ fsop.resblks_avail = mp->m_resblks[FREE_BLOCKS].avail;
spin_unlock(&mp->m_sb_lock);
if (copy_to_user(arg, &fsop, sizeof(fsop)))
struct xfs_fsop_counts out = {
.allocino = percpu_counter_read_positive(&mp->m_icount),
.freeino = percpu_counter_read_positive(&mp->m_ifree),
- .freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
- xfs_fdblocks_unavailable(mp),
- .freertx = percpu_counter_read_positive(&mp->m_frextents),
+ .freedata = xfs_estimate_freecounter(mp, FREE_BLOCKS) -
+ xfs_freecounter_unavailable(mp, FREE_BLOCKS),
+ .freertx = xfs_estimate_freecounter(mp, FREE_RTEXTENTS),
};
if (copy_to_user(uarg, &out, sizeof(out)))
static int64_t
xfs_iomap_freesp(
- struct percpu_counter *counter,
+ struct xfs_mount *mp,
+ unsigned int idx,
uint64_t low_space[XFS_LOWSP_MAX],
int *shift)
{
int64_t freesp;
- freesp = percpu_counter_read_positive(counter);
+ freesp = xfs_estimate_freecounter(mp, idx);
if (freesp < low_space[XFS_LOWSP_5_PCNT]) {
*shift = 2;
if (freesp < low_space[XFS_LOWSP_4_PCNT])
if (unlikely(XFS_IS_REALTIME_INODE(ip)))
freesp = xfs_rtx_to_rtb(mp,
- xfs_iomap_freesp(&mp->m_frextents,
+ xfs_iomap_freesp(mp, FREE_RTEXTENTS,
mp->m_low_rtexts, &shift));
else
- freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space,
+ freesp = xfs_iomap_freesp(mp, FREE_BLOCKS, mp->m_low_space,
&shift);
/*
* we were already there on the last unmount. Warn if this occurs.
*/
if (!xfs_is_readonly(mp)) {
- error = xfs_reserve_blocks(mp, xfs_default_resblks(mp));
+ error = xfs_reserve_blocks(mp, FREE_BLOCKS,
+ xfs_default_resblks(mp));
if (error)
xfs_warn(mp,
"Unable to allocate reserve blocks. Continuing without reserve pool.");
* we only every apply deltas to the superblock and hence the incore
* value does not matter....
*/
- error = xfs_reserve_blocks(mp, 0);
+ error = xfs_reserve_blocks(mp, FREE_BLOCKS, 0);
if (error)
xfs_warn(mp, "Unable to free reserved block pool. "
"Freespace may not be correct on next mount.");
return true;
}
+/*
+ * Estimate the amount of free space that is not available to userspace and is
+ * not explicitly reserved from the incore fdblocks. This includes:
+ *
+ * - The minimum number of blocks needed to support splitting a bmap btree
+ * - The blocks currently in use by the freespace btrees because they record
+ * the actual blocks that will fill per-AG metadata space reservations
+ */
+uint64_t
+xfs_freecounter_unavailable(
+ struct xfs_mount *mp,
+ unsigned int idx)
+{
+ if (idx == FREE_RTEXTENTS)
+ return 0;
+ return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+}
+
void
xfs_add_freecounter(
struct xfs_mount *mp,
- struct percpu_counter *counter,
+ unsigned int idx,
uint64_t delta)
{
- bool has_resv_pool = (counter == &mp->m_fdblocks);
uint64_t res_used;
/*
* If the reserve pool is depleted, put blocks back into it first.
* Most of the time the pool is full.
*/
- if (!has_resv_pool || mp->m_resblks == mp->m_resblks_avail) {
- percpu_counter_add(counter, delta);
+ if (likely(mp->m_resblks[idx].total == mp->m_resblks[idx].avail)) {
+ percpu_counter_add(&mp->m_free[idx], delta);
return;
}
spin_lock(&mp->m_sb_lock);
- res_used = mp->m_resblks - mp->m_resblks_avail;
+ res_used = mp->m_resblks[idx].total - mp->m_resblks[idx].avail;
if (res_used > delta) {
- mp->m_resblks_avail += delta;
+ mp->m_resblks[idx].avail += delta;
} else {
delta -= res_used;
- mp->m_resblks_avail = mp->m_resblks;
- percpu_counter_add(counter, delta);
+ mp->m_resblks[idx].avail = mp->m_resblks[idx].total;
+ percpu_counter_add(&mp->m_free[idx], delta);
}
spin_unlock(&mp->m_sb_lock);
}
+
+/* Adjust in-core free blocks or RT extents. */
int
xfs_dec_freecounter(
struct xfs_mount *mp,
- struct percpu_counter *counter,
+ unsigned int idx,
uint64_t delta,
bool rsvd)
{
+ struct percpu_counter *counter = &mp->m_free[idx];
int64_t lcounter;
uint64_t set_aside = 0;
s32 batch;
- bool has_resv_pool;
- ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
- has_resv_pool = (counter == &mp->m_fdblocks);
- if (rsvd)
- ASSERT(has_resv_pool);
+ ASSERT(idx < FREE_NR);
/*
* Taking blocks away, need to be more accurate the closer we
* problems (i.e. transaction abort, pagecache discards, etc.) than
* slightly premature -ENOSPC.
*/
- if (has_resv_pool)
- set_aside = xfs_fdblocks_unavailable(mp);
+ set_aside = xfs_freecounter_unavailable(mp, idx);
percpu_counter_add_batch(counter, -((int64_t)delta), batch);
if (__percpu_counter_compare(counter, set_aside,
XFS_FDBLOCKS_BATCH) >= 0) {
*/
spin_lock(&mp->m_sb_lock);
percpu_counter_add(counter, delta);
- if (!has_resv_pool || !rsvd)
+ if (!rsvd)
goto fdblocks_enospc;
- lcounter = (long long)mp->m_resblks_avail - delta;
+ lcounter = (long long)mp->m_resblks[idx].avail - delta;
if (lcounter >= 0) {
- mp->m_resblks_avail = lcounter;
+ mp->m_resblks[idx].avail = lcounter;
spin_unlock(&mp->m_sb_lock);
return 0;
}
spinlock_t ____cacheline_aligned m_sb_lock; /* sb counter lock */
struct percpu_counter m_icount; /* allocated inodes counter */
struct percpu_counter m_ifree; /* free inodes counter */
- struct percpu_counter m_fdblocks; /* free block counter */
- struct percpu_counter m_frextents; /* free rt extent counter */
+
+#define FREE_BLOCKS 0 /* free block counter */
+#define FREE_RTEXTENTS 1 /* free rt extent counter */
+#define FREE_NR 2
+ struct percpu_counter m_free[FREE_NR];
/*
* Count of data device blocks reserved for delayed allocations,
struct xarray m_rtgroups; /* per-rt group info */
struct radix_tree_root m_perag_tree; /* per-ag accounting info */
spinlock_t m_perag_lock; /* lock for m_perag_tree */
- uint64_t m_resblks; /* total reserved blocks */
- uint64_t m_resblks_avail;/* available reserved blocks */
- uint64_t m_resblks_save; /* reserved blks @ remount,ro */
+ struct {
+ uint64_t total; /* total reserved blocks */
+ uint64_t avail; /* available reserved blocks */
+ uint64_t save; /* reserved blks @ remount,ro */
+ } m_resblks[FREE_NR];
struct delayed_work m_reclaim_work; /* background inode reclaim */
struct dentry *m_debugfs; /* debugfs parent */
struct xfs_kobj m_kobj;
*/
#define XFS_FDBLOCKS_BATCH 1024
+uint64_t xfs_freecounter_unavailable(struct xfs_mount *mp, unsigned int idx);
+
+static inline s64 xfs_sum_freecounter(struct xfs_mount *mp, unsigned int idx)
+{
+ return percpu_counter_sum(&mp->m_free[idx]);
+}
+
/*
- * Estimate the amount of free space that is not available to userspace and is
- * not explicitly reserved from the incore fdblocks. This includes:
- *
- * - The minimum number of blocks needed to support splitting a bmap btree
- * - The blocks currently in use by the freespace btrees because they record
- * the actual blocks that will fill per-AG metadata space reservations
+ * This just provides and estimate without the cpu-local updates, use
+ * xfs_sum_freecounter for the exact value.
*/
-static inline uint64_t
-xfs_fdblocks_unavailable(
- struct xfs_mount *mp)
+static inline s64 xfs_estimate_freecounter(struct xfs_mount *mp,
+ unsigned int idx)
+{
+ return percpu_counter_read_positive(&mp->m_free[idx]);
+}
+
+static inline int xfs_compare_freecounter(struct xfs_mount *mp,
+ unsigned int idx, s64 rhs, s32 batch)
{
- return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
+ return __percpu_counter_compare(&mp->m_free[idx], rhs, batch);
}
-int xfs_dec_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
- uint64_t delta, bool rsvd);
-void xfs_add_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
+int xfs_dec_freecounter(struct xfs_mount *mp, unsigned int idx, uint64_t delta,
+ bool rsvd);
+void xfs_add_freecounter(struct xfs_mount *mp, unsigned int idx,
uint64_t delta);
static inline int xfs_dec_fdblocks(struct xfs_mount *mp, uint64_t delta,
bool reserved)
{
- return xfs_dec_freecounter(mp, &mp->m_fdblocks, delta, reserved);
+ return xfs_dec_freecounter(mp, FREE_BLOCKS, delta, reserved);
}
static inline void xfs_add_fdblocks(struct xfs_mount *mp, uint64_t delta)
{
- xfs_add_freecounter(mp, &mp->m_fdblocks, delta);
+ xfs_add_freecounter(mp, FREE_BLOCKS, delta);
}
static inline int xfs_dec_frextents(struct xfs_mount *mp, uint64_t delta)
{
- return xfs_dec_freecounter(mp, &mp->m_frextents, delta, false);
+ return xfs_dec_freecounter(mp, FREE_RTEXTENTS, delta, false);
}
static inline void xfs_add_frextents(struct xfs_mount *mp, uint64_t delta)
{
- xfs_add_freecounter(mp, &mp->m_frextents, delta);
+ xfs_add_freecounter(mp, FREE_RTEXTENTS, delta);
}
extern int xfs_readsb(xfs_mount_t *, int);
spin_lock(&mp->m_sb_lock);
mp->m_sb.sb_frextents = val;
spin_unlock(&mp->m_sb_lock);
- percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+ percpu_counter_set(&mp->m_free[FREE_RTEXTENTS], mp->m_sb.sb_frextents);
return 0;
}
icount = percpu_counter_sum(&mp->m_icount);
ifree = percpu_counter_sum(&mp->m_ifree);
- fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ fdblocks = xfs_sum_freecounter(mp, FREE_BLOCKS);
spin_lock(&mp->m_sb_lock);
statp->f_bsize = sbp->sb_blocksize;
/* make sure statp->f_bfree does not underflow */
statp->f_bfree = max_t(int64_t, 0,
- fdblocks - xfs_fdblocks_unavailable(mp));
+ fdblocks - xfs_freecounter_unavailable(mp, FREE_BLOCKS));
statp->f_bavail = statp->f_bfree;
fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
s64 freertx;
statp->f_blocks = sbp->sb_rblocks;
- freertx = percpu_counter_sum_positive(&mp->m_frextents);
+ freertx = max_t(int64_t, 0, xfs_sum_freecounter(mp, FREE_RTEXTENTS));
statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
}
}
STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
+xfs_save_resvblks(
+ struct xfs_mount *mp)
{
- mp->m_resblks_save = mp->m_resblks;
- xfs_reserve_blocks(mp, 0);
+ unsigned int i;
+
+ for (i = 0; i < FREE_NR; i++) {
+ mp->m_resblks[i].save = mp->m_resblks[i].total;
+ xfs_reserve_blocks(mp, i, 0);
+ }
}
STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
+xfs_restore_resvblks(
+ struct xfs_mount *mp)
{
- uint64_t resblks;
-
- if (mp->m_resblks_save) {
- resblks = mp->m_resblks_save;
- mp->m_resblks_save = 0;
- } else
- resblks = xfs_default_resblks(mp);
+ uint64_t resblks;
+ unsigned int i;
- xfs_reserve_blocks(mp, resblks);
+ for (i = 0; i < FREE_NR; i++) {
+ if (mp->m_resblks[i].save) {
+ resblks = mp->m_resblks[i].save;
+ mp->m_resblks[i].save = 0;
+ } else
+ resblks = xfs_default_resblks(mp);
+ xfs_reserve_blocks(mp, i, resblks);
+ }
}
/*
if (error)
goto free_icount;
- error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+ error = percpu_counter_init(&mp->m_free[FREE_BLOCKS], 0, GFP_KERNEL);
if (error)
goto free_ifree;
if (error)
goto free_delalloc;
- error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
+ error = percpu_counter_init(&mp->m_free[FREE_RTEXTENTS], 0, GFP_KERNEL);
if (error)
goto free_delalloc_rt;
free_delalloc:
percpu_counter_destroy(&mp->m_delalloc_blks);
free_fdblocks:
- percpu_counter_destroy(&mp->m_fdblocks);
+ percpu_counter_destroy(&mp->m_free[FREE_BLOCKS]);
free_ifree:
percpu_counter_destroy(&mp->m_ifree);
free_icount:
{
percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
- percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
- percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
+ percpu_counter_set(&mp->m_free[FREE_BLOCKS], mp->m_sb.sb_fdblocks);
+ percpu_counter_set(&mp->m_free[FREE_RTEXTENTS], mp->m_sb.sb_frextents);
}
static void
{
percpu_counter_destroy(&mp->m_icount);
percpu_counter_destroy(&mp->m_ifree);
- percpu_counter_destroy(&mp->m_fdblocks);
+ percpu_counter_destroy(&mp->m_free[FREE_BLOCKS]);
ASSERT(xfs_is_shutdown(mp) ||
percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
percpu_counter_destroy(&mp->m_delalloc_rtextents);
ASSERT(xfs_is_shutdown(mp) ||
percpu_counter_sum(&mp->m_delalloc_blks) == 0);
percpu_counter_destroy(&mp->m_delalloc_blks);
- percpu_counter_destroy(&mp->m_frextents);
+ percpu_counter_destroy(&mp->m_free[FREE_RTEXTENTS]);
}
static int
__entry->dev = mp->m_super->s_dev;
__entry->ino = ip->i_ino;
- __entry->freeblks = percpu_counter_sum(&mp->m_fdblocks);
+ __entry->freeblks = xfs_sum_freecounter(mp, FREE_BLOCKS);
__entry->reserved = ip->i_delayed_blks;
__entry->asked = ip->i_meta_resv_asked;
__entry->used = ip->i_nblocks;
* update the incore superblock so that future calls to xfs_log_sb
* write the correct value ondisk.
*
- * Don't touch m_frextents because it includes incore reservations,
- * and those are handled by the unreserve function.
+ * Don't touch m_free[FREE_RTEXTENTS] because it includes incore
+ * reservations, and those are handled by the unreserve function.
*
* sb_frextents was added to the lazy sb counters when the rt groups
* feature was introduced. This is possible because we know that all