From 725d0e9d464d567cd9290e29879d8bffc92013f8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 2 Oct 2018 14:59:54 +0100 Subject: gfs2: Add per-reservation reserved block accounting Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of blocks reserved by this particular reservation, and a rd_reserved field to struct gfs2_rgrpd to keep track of the total number of reserved blocks in the resource group. Those blocks are exclusively reserved, as opposed to the rs_requested / rd_requested blocks which are tracked in the reservation tree (rd_rstree) and which can be stolen if necessary. When making a reservation with gfs2_inplace_reserve, rs_reserved is set to somewhere between ap->min_target and ap->target depending on the number of free blocks in the resource group. When allocating blocks with gfs2_alloc_blocks, rs_reserved is decremented accordingly. Eventually, any reserved but not consumed blocks are returned to the resource group by gfs2_inplace_release. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 +-- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 1 + fs/gfs2/rgrp.c | 80 +++++++++++++++++++++++++++++++++++++++------------- fs/gfs2/trace_gfs2.h | 23 +++++++++++---- 5 files changed, 82 insertions(+), 28 deletions(-) (limited to 'fs/gfs2') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 177c4d74ca30..294087516ce0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ - if (ap.allowed && ap.allowed < max_blks) - max_blks = ap.allowed; + if (ip->i_res.rs_reserved < max_blks) + max_blks = ip->i_res.rs_reserved; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0640d0c70a75..2679ba54798c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -107,6 +107,7 @@ struct gfs2_rgrpd { u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_requested; /* number of blocks in rd_rstree */ + u32 rd_reserved; /* number of reserved blocks */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; @@ -292,6 +293,7 @@ struct gfs2_blkreserv { struct gfs2_rgrpd *rs_rgd; u64 rs_start; u32 rs_requested; + u32 rs_reserved; /* number of reserved blocks */ }; /* diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3922b26264f5..802bc15f9f11 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes); clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_free_clone < rgd->rd_reserved); rgd->rd_extfail_pt = rgd->rd_free; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bc8d1ab9e07f..f1df5e75364a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgrp_set_bitmap_flags(rgd); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; } @@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); @@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd_free(rgd, rs); + u32 free_blocks, blocks_available; int ret; struct inode *inode = &ip->i_inode; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + if (rgd->rd_free_clone < rgd->rd_requested) + free_blocks = 0; + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (rgd == rs->rs_rgd) + blocks_available += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + if (S_ISDIR(inode->i_mode)) extlen = 1; else { extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen)) + if (free_blocks < extlen || blocks_available < extlen) return; /* Find bitmap block that contains bits for goal block */ @@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has - * at least ap->min_target blocks available. Either way, we set ap->allowed to - * the number of blocks available in the chosen rgrp. + * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found @@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) u64 last_unlinked = NO_BLOCK; u32 target = ap->target; int loops = 0; - u32 free_blocks, skip = 0; + u32 free_blocks, blocks_available, skip = 0; + + BUG_ON(rs->rs_reserved); if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EBADSLT; while (loops < 3) { + struct gfs2_rgrpd *rgd; + rg_locked = 1; if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { @@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rgd, rs); - if (free_blocks >= target) { - ap->allowed = free_blocks; - return 0; + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (free_blocks < target || blocks_available < target) { + spin_unlock(&rgd->rd_rsspin); + goto check_rgrp; } + rs->rs_reserved = ap->target; + if (rs->rs_reserved > blocks_available) + rs->rs_reserved = blocks_available; + rgd->rd_reserved += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) @@ -2172,6 +2195,17 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { + struct gfs2_blkreserv *rs = &ip->i_res; + + if (rs->rs_reserved) { + struct gfs2_rgrpd *rgd = rs->rs_rgd; + + spin_lock(&rgd->rd_rsspin); + BUG_ON(rgd->rd_reserved < rs->rs_reserved); + rgd->rd_reserved -= rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rs->rs_reserved = 0; + } if (gfs2_holder_initialized(&ip->i_rgd_gh)) gfs2_glock_dq_uninit(&ip->i_rgd_gh); } @@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *trs; const struct rb_node *n; - gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_requested, rgd->rd_extfail_pt); + rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; @@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - spin_lock(&rgd->rd_rsspin); + BUG_ON(rs->rs_reserved < len); + rs->rs_reserved -= len; if (gfs2_rs_active(rs)) { u64 start = gfs2_rbm_to_block(rbm); @@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && rs->rs_requested) - goto out; + return; /* We used up our block reservation, so we should reserve more blocks next time. */ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint); } __rs_deltree(rs); } -out: - spin_unlock(&rgd->rd_rsspin); } /** @@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, u32 minext = 1; int error = -ENOSPC; + BUG_ON(ip->i_res.rs_reserved < *nblocks); + if (gfs2_rs_active(&ip->i_res)) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); @@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(&ip->i_res)) - gfs2_adjust_reservation(ip, &rbm, *nblocks); if (!dinode) { ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rbm.rgd->rd_free < *nblocks) { + spin_lock(&rbm.rgd->rd_rsspin); + gfs2_adjust_reservation(ip, &rbm, *nblocks); + if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) { fs_warn(sdp, "nblocks=%u\n", *nblocks); + spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - + BUG_ON(rbm.rgd->rd_reserved < *nblocks); + BUG_ON(rbm.rgd->rd_free_clone < *nblocks); + BUG_ON(rbm.rgd->rd_free < *nblocks); + rbm.rgd->rd_reserved -= *nblocks; + rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; + spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { rbm.rgd->rd_dinodes++; *generation = rbm.rgd->rd_igeneration++; @@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); - rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index d24bdcdd42e5..bd6c8e9e49db 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) ), TP_fast_assign( @@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc, __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; __entry->rd_requested = rgd->rd_requested; + __entry->rd_reserved = rgd->rd_reserved; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone, (unsigned long)__entry->rd_requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved) ); /* Keep track of multi-block reservations as they are allocated/freed */ @@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) __field( u64, inum ) __field( u64, start ) __field( u32, requested ) + __field( u32, reserved ) __field( u8, func ) ), @@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rgd->rd_addr; __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; __entry->rd_requested = rs->rs_rgd->rd_requested; + __entry->rd_reserved = rs->rs_rgd->rd_reserved; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; __entry->start = rs->rs_start; __entry->requested = rs->rs_requested; + __entry->reserved = rs->rs_reserved; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long long)__entry->rd_addr, - (unsigned long)__entry->rd_free_clone, - (unsigned long)__entry->rd_requested, - rs_func_name(__entry->func), (unsigned long)__entry->requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved, + rs_func_name(__entry->func), + __entry->requested, + __entry->reserved) ); #endif /* _TRACE_GFS2_H */ -- cgit v1.2.3