]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
gfs2: Add per-reservation reserved block accounting
authorAndreas Gruenbacher <agruenba@redhat.com>
Tue, 2 Oct 2018 13:59:54 +0000 (14:59 +0100)
committerAndreas Gruenbacher <agruenba@redhat.com>
Wed, 17 Feb 2021 18:30:26 +0000 (19:30 +0100)
Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of
blocks reserved by this particular reservation, and a rd_reserved field to
struct gfs2_rgrpd to keep track of the total number of reserved blocks in the
resource group.  Those blocks are exclusively reserved, as opposed to the
rs_requested / rd_requested blocks which are tracked in the reservation tree
(rd_rstree) and which can be stolen if necessary.

When making a reservation with gfs2_inplace_reserve, rs_reserved is set to
somewhere between ap->min_target and ap->target depending on the number of free
blocks in the resource group.  When allocating blocks with gfs2_alloc_blocks,
rs_reserved is decremented accordingly.  Eventually, any reserved but not
consumed blocks are returned to the resource group by gfs2_inplace_release.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
fs/gfs2/file.c
fs/gfs2/incore.h
fs/gfs2/lops.c
fs/gfs2/rgrp.c
fs/gfs2/trace_gfs2.h

index 177c4d74ca3037cb9457a08fe48bc1837d05bc77..294087516ce0e13b4bf4e30e56ee2ae0ebb24da3 100644 (file)
@@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
                        goto out_qunlock;
 
                /* check if the selected rgrp limits our max_blks further */
-               if (ap.allowed && ap.allowed < max_blks)
-                       max_blks = ap.allowed;
+               if (ip->i_res.rs_reserved < max_blks)
+                       max_blks = ip->i_res.rs_reserved;
 
                /* Almost done. Calculate bytes that can be written using
                 * max_blks. We also recompute max_bytes, data_blocks and
index 0640d0c70a75e67439b441d9d61d6846725d3542..2679ba54798c8566fd777a0cd1e8ba136cac4f56 100644 (file)
@@ -107,6 +107,7 @@ struct gfs2_rgrpd {
        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        u32 rd_free;
        u32 rd_requested;               /* number of blocks in rd_rstree */
+       u32 rd_reserved;                /* number of reserved blocks */
        u32 rd_free_clone;
        u32 rd_dinodes;
        u64 rd_igeneration;
@@ -292,6 +293,7 @@ struct gfs2_blkreserv {
        struct gfs2_rgrpd *rs_rgd;
        u64 rs_start;
        u32 rs_requested;
+       u32 rs_reserved;              /* number of reserved blocks */
 };
 
 /*
index 3922b26264f5a87cec611ab2e6b01cf4827cde44..802bc15f9f11a10a455d2a572e0d0049da13304b 100644 (file)
@@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
               bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
        clear_bit(GBF_FULL, &bi->bi_flags);
        rgd->rd_free_clone = rgd->rd_free;
+       BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
        rgd->rd_extfail_pt = rgd->rd_free;
 }
 
index bc8d1ab9e07f102d60a40fc6cf24fb32e6302f00..f1df5e75364ab8be97d1ddad2794be96fa9448e5 100644 (file)
@@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
                rgrp_set_bitmap_flags(rgd);
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
+               BUG_ON(rgd->rd_reserved);
                /* max out the rgrp allocation failure point */
                rgd->rd_extfail_pt = rgd->rd_free;
        }
@@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
        rgrp_set_bitmap_flags(rgd);
        rgd->rd_free_clone = rgd->rd_free;
+       BUG_ON(rgd->rd_reserved);
        /* max out the rgrp allocation failure point */
        rgd->rd_extfail_pt = rgd->rd_free;
        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
@@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
        u64 goal;
        struct gfs2_blkreserv *rs = &ip->i_res;
        u32 extlen;
-       u32 free_blocks = rgd_free(rgd, rs);
+       u32 free_blocks, blocks_available;
        int ret;
        struct inode *inode = &ip->i_inode;
 
+       spin_lock(&rgd->rd_rsspin);
+       free_blocks = rgd_free(rgd, rs);
+       if (rgd->rd_free_clone < rgd->rd_requested)
+               free_blocks = 0;
+       blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+       if (rgd == rs->rs_rgd)
+               blocks_available += rs->rs_reserved;
+       spin_unlock(&rgd->rd_rsspin);
+
        if (S_ISDIR(inode->i_mode))
                extlen = 1;
        else {
                extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
                extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
        }
-       if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen))
+       if (free_blocks < extlen || blocks_available < extlen)
                return;
 
        /* Find bitmap block that contains bits for goal block */
@@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
  * We try our best to find an rgrp that has at least ap->target blocks
  * available. After a couple of passes (loops == 2), the prospects of finding
  * such an rgrp diminish. At this stage, we return the first rgrp that has
- * at least ap->min_target blocks available. Either way, we set ap->allowed to
- * the number of blocks available in the chosen rgrp.
+ * at least ap->min_target blocks available.
  *
  * Returns: 0 on success,
  *          -ENOMEM if a suitable rgrp can't be found
@@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
        u64 last_unlinked = NO_BLOCK;
        u32 target = ap->target;
        int loops = 0;
-       u32 free_blocks, skip = 0;
+       u32 free_blocks, blocks_available, skip = 0;
+
+       BUG_ON(rs->rs_reserved);
 
        if (sdp->sd_args.ar_rgrplvb)
                flags |= GL_SKIP;
@@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
                return -EBADSLT;
 
        while (loops < 3) {
+               struct gfs2_rgrpd *rgd;
+
                rg_locked = 1;
 
                if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
@@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
                        goto check_rgrp;
 
                /* If rgrp has enough free space, use it */
-               free_blocks = rgd_free(rs->rs_rgd, rs);
-               if (free_blocks >= target) {
-                       ap->allowed = free_blocks;
-                       return 0;
+               rgd = rs->rs_rgd;
+               spin_lock(&rgd->rd_rsspin);
+               free_blocks = rgd_free(rgd, rs);
+               blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+               if (free_blocks < target || blocks_available < target) {
+                       spin_unlock(&rgd->rd_rsspin);
+                       goto check_rgrp;
                }
+               rs->rs_reserved = ap->target;
+               if (rs->rs_reserved > blocks_available)
+                       rs->rs_reserved = blocks_available;
+               rgd->rd_reserved += rs->rs_reserved;
+               spin_unlock(&rgd->rd_rsspin);
+               return 0;
 check_rgrp:
                /* Check for unlinked inodes which can be reclaimed */
                if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
@@ -2172,6 +2195,17 @@ next_rgrp:
 
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
+       struct gfs2_blkreserv *rs = &ip->i_res;
+
+       if (rs->rs_reserved) {
+               struct gfs2_rgrpd *rgd = rs->rs_rgd;
+
+               spin_lock(&rgd->rd_rsspin);
+               BUG_ON(rgd->rd_reserved < rs->rs_reserved);
+               rgd->rd_reserved -= rs->rs_reserved;
+               spin_unlock(&rgd->rd_rsspin);
+               rs->rs_reserved = 0;
+       }
        if (gfs2_holder_initialized(&ip->i_rgd_gh))
                gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 }
@@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
        struct gfs2_blkreserv *trs;
        const struct rb_node *n;
 
-       gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+       gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
                       fs_id_buf,
                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-                      rgd->rd_requested, rgd->rd_extfail_pt);
+                      rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
        if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
                struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
 
@@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
        struct gfs2_blkreserv *rs = &ip->i_res;
        struct gfs2_rgrpd *rgd = rbm->rgd;
 
-       spin_lock(&rgd->rd_rsspin);
+       BUG_ON(rs->rs_reserved < len);
+       rs->rs_reserved -= len;
        if (gfs2_rs_active(rs)) {
                u64 start = gfs2_rbm_to_block(rbm);
 
@@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
                        if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
                            rs->rs_requested)
-                               goto out;
+                               return;
                        /* We used up our block reservation, so we should
                           reserve more blocks next time. */
                        atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
                }
                __rs_deltree(rs);
        }
-out:
-       spin_unlock(&rgd->rd_rsspin);
 }
 
 /**
@@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        u32 minext = 1;
        int error = -ENOSPC;
 
+       BUG_ON(ip->i_res.rs_reserved < *nblocks);
+
        if (gfs2_rs_active(&ip->i_res)) {
                gfs2_set_alloc_start(&rbm, ip, dinode);
                error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        gfs2_alloc_extent(&rbm, dinode, nblocks);
        block = gfs2_rbm_to_block(&rbm);
        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
-       if (gfs2_rs_active(&ip->i_res))
-               gfs2_adjust_reservation(ip, &rbm, *nblocks);
        if (!dinode) {
                ip->i_goal = block + *nblocks - 1;
                error = gfs2_meta_inode_buffer(ip, &dibh);
@@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                        brelse(dibh);
                }
        }
-       if (rbm.rgd->rd_free < *nblocks) {
+       spin_lock(&rbm.rgd->rd_rsspin);
+       gfs2_adjust_reservation(ip, &rbm, *nblocks);
+       if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
                fs_warn(sdp, "nblocks=%u\n", *nblocks);
+               spin_unlock(&rbm.rgd->rd_rsspin);
                goto rgrp_error;
        }
-
+       BUG_ON(rbm.rgd->rd_reserved < *nblocks);
+       BUG_ON(rbm.rgd->rd_free_clone < *nblocks);
+       BUG_ON(rbm.rgd->rd_free < *nblocks);
+       rbm.rgd->rd_reserved -= *nblocks;
+       rbm.rgd->rd_free_clone -= *nblocks;
        rbm.rgd->rd_free -= *nblocks;
+       spin_unlock(&rbm.rgd->rd_rsspin);
        if (dinode) {
                rbm.rgd->rd_dinodes++;
                *generation = rbm.rgd->rd_igeneration++;
@@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
-       rbm.rgd->rd_free_clone -= *nblocks;
        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
        *bn = block;
index d24bdcdd42e5002f70cf24bd5583af8aa3ce5987..bd6c8e9e49db0b0de36e0495b901e739b381342b 100644 (file)
@@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc,
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
                __field(        u32,    rd_requested            )
+               __field(        u32,    rd_reserved             )
        ),
 
        TP_fast_assign(
@@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc,
                __entry->rd_addr        = rgd->rd_addr;
                __entry->rd_free_clone  = rgd->rd_free_clone;
                __entry->rd_requested   = rgd->rd_requested;
+               __entry->rd_reserved    = rgd->rd_reserved;
        ),
 
-       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
+       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long)__entry->len,
                  block_state_name(__entry->block_state),
                  (unsigned long long)__entry->rd_addr,
-                 __entry->rd_free_clone, (unsigned long)__entry->rd_requested)
+                 __entry->rd_free_clone,
+                 __entry->rd_requested,
+                 __entry->rd_reserved)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs,
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
                __field(        u32,    rd_requested            )
+               __field(        u32,    rd_reserved             )
                __field(        u64,    inum                    )
                __field(        u64,    start                   )
                __field(        u32,    requested               )
+               __field(        u32,    reserved                )
                __field(        u8,     func                    )
        ),
 
@@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs,
                __entry->rd_addr        = rs->rs_rgd->rd_addr;
                __entry->rd_free_clone  = rs->rs_rgd->rd_free_clone;
                __entry->rd_requested   = rs->rs_rgd->rd_requested;
+               __entry->rd_reserved    = rs->rs_rgd->rd_reserved;
                __entry->inum           = container_of(rs, struct gfs2_inode,
                                                       i_res)->i_no_addr;
                __entry->start          = rs->rs_start;
                __entry->requested      = rs->rs_requested;
+               __entry->reserved       = rs->rs_reserved;
                __entry->func           = func;
        ),
 
-       TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
+       TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long long)__entry->rd_addr,
-                 (unsigned long)__entry->rd_free_clone,
-                 (unsigned long)__entry->rd_requested,
-                 rs_func_name(__entry->func), (unsigned long)__entry->requested)
+                 __entry->rd_free_clone,
+                 __entry->rd_requested,
+                 __entry->rd_reserved,
+                 rs_func_name(__entry->func),
+                 __entry->requested,
+                 __entry->reserved)
 );
 
 #endif /* _TRACE_GFS2_H */