From e7501bf88cd77ed3a1bc65c451600a847c80485b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 19 Dec 2020 03:11:51 +0100 Subject: gfs2: Rename gfs2_{write => flush}_revokes Function gfs2_write_revokes doesn't actually write any revokes; instead, it adds revokes to the system transaction during a flush. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/gfs2/lops.c') diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3922b26264f5..8658ebbcb4a9 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -845,7 +845,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) struct page *page; unsigned int length; - gfs2_write_revokes(sdp); + gfs2_flush_revokes(sdp); if (!sdp->sd_log_num_revoke) return; -- cgit v1.2.3 From 725d0e9d464d567cd9290e29879d8bffc92013f8 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 2 Oct 2018 14:59:54 +0100 Subject: gfs2: Add per-reservation reserved block accounting Add a rs_reserved field to struct gfs2_blkreserv to keep track of the number of blocks reserved by this particular reservation, and a rd_reserved field to struct gfs2_rgrpd to keep track of the total number of reserved blocks in the resource group. Those blocks are exclusively reserved, as opposed to the rs_requested / rd_requested blocks which are tracked in the reservation tree (rd_rstree) and which can be stolen if necessary. When making a reservation with gfs2_inplace_reserve, rs_reserved is set to somewhere between ap->min_target and ap->target depending on the number of free blocks in the resource group. When allocating blocks with gfs2_alloc_blocks, rs_reserved is decremented accordingly. Eventually, any reserved but not consumed blocks are returned to the resource group by gfs2_inplace_release. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/file.c | 4 +-- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 1 + fs/gfs2/rgrp.c | 80 +++++++++++++++++++++++++++++++++++++++------------- fs/gfs2/trace_gfs2.h | 23 +++++++++++---- 5 files changed, 82 insertions(+), 28 deletions(-) (limited to 'fs/gfs2/lops.c') diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 177c4d74ca30..294087516ce0 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1115,8 +1115,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t goto out_qunlock; /* check if the selected rgrp limits our max_blks further */ - if (ap.allowed && ap.allowed < max_blks) - max_blks = ap.allowed; + if (ip->i_res.rs_reserved < max_blks) + max_blks = ip->i_res.rs_reserved; /* Almost done. Calculate bytes that can be written using * max_blks. We also recompute max_bytes, data_blocks and diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0640d0c70a75..2679ba54798c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -107,6 +107,7 @@ struct gfs2_rgrpd { u32 rd_bitbytes; /* number of bytes in data bitmaps */ u32 rd_free; u32 rd_requested; /* number of blocks in rd_rstree */ + u32 rd_reserved; /* number of reserved blocks */ u32 rd_free_clone; u32 rd_dinodes; u64 rd_igeneration; @@ -292,6 +293,7 @@ struct gfs2_blkreserv { struct gfs2_rgrpd *rs_rgd; u64 rs_start; u32 rs_requested; + u32 rs_reserved; /* number of reserved blocks */ }; /* diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3922b26264f5..802bc15f9f11 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -84,6 +84,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes); clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_free_clone < rgd->rd_reserved); rgd->rd_extfail_pt = rgd->rd_free; } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bc8d1ab9e07f..f1df5e75364a 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1229,6 +1229,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) rgrp_set_bitmap_flags(rgd); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; } @@ -1278,6 +1279,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); rgrp_set_bitmap_flags(rgd); rgd->rd_free_clone = rgd->rd_free; + BUG_ON(rgd->rd_reserved); /* max out the rgrp allocation failure point */ rgd->rd_extfail_pt = rgd->rd_free; rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); @@ -1568,17 +1570,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u64 goal; struct gfs2_blkreserv *rs = &ip->i_res; u32 extlen; - u32 free_blocks = rgd_free(rgd, rs); + u32 free_blocks, blocks_available; int ret; struct inode *inode = &ip->i_inode; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + if (rgd->rd_free_clone < rgd->rd_requested) + free_blocks = 0; + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (rgd == rs->rs_rgd) + blocks_available += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + if (S_ISDIR(inode->i_mode)) extlen = 1; else { extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target); extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks); } - if ((rgd->rd_free_clone < rgd->rd_requested) || (free_blocks < extlen)) + if (free_blocks < extlen || blocks_available < extlen) return; /* Find bitmap block that contains bits for goal block */ @@ -2027,8 +2038,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd) * We try our best to find an rgrp that has at least ap->target blocks * available. After a couple of passes (loops == 2), the prospects of finding * such an rgrp diminish. At this stage, we return the first rgrp that has - * at least ap->min_target blocks available. Either way, we set ap->allowed to - * the number of blocks available in the chosen rgrp. + * at least ap->min_target blocks available. * * Returns: 0 on success, * -ENOMEM if a suitable rgrp can't be found @@ -2044,7 +2054,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) u64 last_unlinked = NO_BLOCK; u32 target = ap->target; int loops = 0; - u32 free_blocks, skip = 0; + u32 free_blocks, blocks_available, skip = 0; + + BUG_ON(rs->rs_reserved); if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; @@ -2065,6 +2077,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) return -EBADSLT; while (loops < 3) { + struct gfs2_rgrpd *rgd; + rg_locked = 1; if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { @@ -2115,11 +2129,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) goto check_rgrp; /* If rgrp has enough free space, use it */ - free_blocks = rgd_free(rs->rs_rgd, rs); - if (free_blocks >= target) { - ap->allowed = free_blocks; - return 0; + rgd = rs->rs_rgd; + spin_lock(&rgd->rd_rsspin); + free_blocks = rgd_free(rgd, rs); + blocks_available = rgd->rd_free_clone - rgd->rd_reserved; + if (free_blocks < target || blocks_available < target) { + spin_unlock(&rgd->rd_rsspin); + goto check_rgrp; } + rs->rs_reserved = ap->target; + if (rs->rs_reserved > blocks_available) + rs->rs_reserved = blocks_available; + rgd->rd_reserved += rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) @@ -2172,6 +2195,17 @@ next_rgrp: void gfs2_inplace_release(struct gfs2_inode *ip) { + struct gfs2_blkreserv *rs = &ip->i_res; + + if (rs->rs_reserved) { + struct gfs2_rgrpd *rgd = rs->rs_rgd; + + spin_lock(&rgd->rd_rsspin); + BUG_ON(rgd->rd_reserved < rs->rs_reserved); + rgd->rd_reserved -= rs->rs_reserved; + spin_unlock(&rgd->rd_rsspin); + rs->rs_reserved = 0; + } if (gfs2_holder_initialized(&ip->i_rgd_gh)) gfs2_glock_dq_uninit(&ip->i_rgd_gh); } @@ -2259,11 +2293,11 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *trs; const struct rb_node *n; - gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", + gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_requested, rgd->rd_extfail_pt); + rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt); if (rgd->rd_sbd->sd_args.ar_rgrplvb) { struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; @@ -2310,7 +2344,8 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, struct gfs2_blkreserv *rs = &ip->i_res; struct gfs2_rgrpd *rgd = rbm->rgd; - spin_lock(&rgd->rd_rsspin); + BUG_ON(rs->rs_reserved < len); + rs->rs_reserved -= len; if (gfs2_rs_active(rs)) { u64 start = gfs2_rbm_to_block(rbm); @@ -2324,15 +2359,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, trace_gfs2_rs(rs, TRACE_RS_CLAIM); if (rs->rs_start < rgd->rd_data0 + rgd->rd_data && rs->rs_requested) - goto out; + return; /* We used up our block reservation, so we should reserve more blocks next time. */ atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint); } __rs_deltree(rs); } -out: - spin_unlock(&rgd->rd_rsspin); } /** @@ -2386,6 +2419,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, u32 minext = 1; int error = -ENOSPC; + BUG_ON(ip->i_res.rs_reserved < *nblocks); + if (gfs2_rs_active(&ip->i_res)) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); @@ -2407,8 +2442,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_alloc_extent(&rbm, dinode, nblocks); block = gfs2_rbm_to_block(&rbm); rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; - if (gfs2_rs_active(&ip->i_res)) - gfs2_adjust_reservation(ip, &rbm, *nblocks); if (!dinode) { ip->i_goal = block + *nblocks - 1; error = gfs2_meta_inode_buffer(ip, &dibh); @@ -2421,12 +2454,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, brelse(dibh); } } - if (rbm.rgd->rd_free < *nblocks) { + spin_lock(&rbm.rgd->rd_rsspin); + gfs2_adjust_reservation(ip, &rbm, *nblocks); + if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) { fs_warn(sdp, "nblocks=%u\n", *nblocks); + spin_unlock(&rbm.rgd->rd_rsspin); goto rgrp_error; } - + BUG_ON(rbm.rgd->rd_reserved < *nblocks); + BUG_ON(rbm.rgd->rd_free_clone < *nblocks); + BUG_ON(rbm.rgd->rd_free < *nblocks); + rbm.rgd->rd_reserved -= *nblocks; + rbm.rgd->rd_free_clone -= *nblocks; rbm.rgd->rd_free -= *nblocks; + spin_unlock(&rbm.rgd->rd_rsspin); if (dinode) { rbm.rgd->rd_dinodes++; *generation = rbm.rgd->rd_igeneration++; @@ -2443,7 +2484,6 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); - rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); *bn = block; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index d24bdcdd42e5..bd6c8e9e49db 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -561,6 +561,7 @@ TRACE_EVENT(gfs2_block_alloc, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) ), TP_fast_assign( @@ -572,16 +573,19 @@ TRACE_EVENT(gfs2_block_alloc, __entry->rd_addr = rgd->rd_addr; __entry->rd_free_clone = rgd->rd_free_clone; __entry->rd_requested = rgd->rd_requested; + __entry->rd_reserved = rgd->rd_reserved; ), - TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu", + TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long)__entry->len, block_state_name(__entry->block_state), (unsigned long long)__entry->rd_addr, - __entry->rd_free_clone, (unsigned long)__entry->rd_requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved) ); /* Keep track of multi-block reservations as they are allocated/freed */ @@ -596,9 +600,11 @@ TRACE_EVENT(gfs2_rs, __field( u64, rd_addr ) __field( u32, rd_free_clone ) __field( u32, rd_requested ) + __field( u32, rd_reserved ) __field( u64, inum ) __field( u64, start ) __field( u32, requested ) + __field( u32, reserved ) __field( u8, func ) ), @@ -607,21 +613,26 @@ TRACE_EVENT(gfs2_rs, __entry->rd_addr = rs->rs_rgd->rd_addr; __entry->rd_free_clone = rs->rs_rgd->rd_free_clone; __entry->rd_requested = rs->rs_rgd->rd_requested; + __entry->rd_reserved = rs->rs_rgd->rd_reserved; __entry->inum = container_of(rs, struct gfs2_inode, i_res)->i_no_addr; __entry->start = rs->rs_start; __entry->requested = rs->rs_requested; + __entry->reserved = rs->rs_reserved; __entry->func = func; ), - TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu", + TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->inum, (unsigned long long)__entry->start, (unsigned long long)__entry->rd_addr, - (unsigned long)__entry->rd_free_clone, - (unsigned long)__entry->rd_requested, - rs_func_name(__entry->func), (unsigned long)__entry->requested) + __entry->rd_free_clone, + __entry->rd_requested, + __entry->rd_reserved, + rs_func_name(__entry->func), + __entry->requested, + __entry->reserved) ); #endif /* _TRACE_GFS2_H */ -- cgit v1.2.3 From 9e514605c77451745ea9fca5a26fc3153893686a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 8 Feb 2021 20:44:26 +0100 Subject: gfs2: Add local resource group locking Prepare for treating resource group glocks as exclusive among nodes but shared among all tasks running on a node: introduce another layer of node-specific locking that the local tasks can use to coordinate their accesses. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 2 ++ fs/gfs2/lops.c | 6 +++++- fs/gfs2/rgrp.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++------ fs/gfs2/rgrp.h | 4 ++++ 4 files changed, 59 insertions(+), 7 deletions(-) (limited to 'fs/gfs2/lops.c') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 2679ba54798c..92f5a920ce61 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -20,6 +20,7 @@ #include #include #include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -123,6 +124,7 @@ struct gfs2_rgrpd { #define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */ #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ + struct mutex rd_mutex; struct rb_root rd_rstree; /* multi-block reservation tree */ }; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 802bc15f9f11..dffa6cc8b5b8 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; + rgrp_lock_local(rgd); if (bi->bi_clone == NULL) - return; + goto out; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); memcpy(bi->bi_clone + bi->bi_offset, @@ -86,6 +87,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd) rgd->rd_free_clone = rgd->rd_free; BUG_ON(rgd->rd_free_clone < rgd->rd_reserved); rgd->rd_extfail_pt = rgd->rd_free; + +out: + rgrp_unlock_local(rgd); } /** diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f1df5e75364a..f45b787307d0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -920,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) rgd->rd_data = be32_to_cpu(buf.ri_data); rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); spin_lock_init(&rgd->rd_rsspin); + mutex_init(&rgd->rd_mutex); error = compute_bitstructs(rgd); if (error) @@ -1449,9 +1450,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) /* Trim each bitmap in the rgrp */ for (x = 0; x < rgd->rd_length; x++) { struct gfs2_bitmap *bi = rgd->rd_bits + x; + rgrp_lock_local(rgd); ret = gfs2_rgrp_send_discards(sdp, rgd->rd_data0, NULL, bi, minlen, &amt); + rgrp_unlock_local(rgd); if (ret) { gfs2_glock_dq_uninit(&gh); goto out; @@ -1463,9 +1466,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp) ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; + rgrp_lock_local(rgd); rgd->rd_flags |= GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); + rgrp_unlock_local(rgd); gfs2_trans_end(sdp); } } @@ -2050,7 +2055,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; struct gfs2_blkreserv *rs = &ip->i_res; - int error = 0, rg_locked, flags = 0; + int error = 0, flags = 0; + bool rg_locked; u64 last_unlinked = NO_BLOCK; u32 target = ap->target; int loops = 0; @@ -2079,10 +2085,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) while (loops < 3) { struct gfs2_rgrpd *rgd; - rg_locked = 1; - - if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) { - rg_locked = 0; + rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl); + if (rg_locked) { + rgrp_lock_local(rs->rs_rgd); + } else { if (skip && skip--) goto next_rgrp; if (!gfs2_rs_active(rs)) { @@ -2099,12 +2105,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) &ip->i_rgd_gh); if (unlikely(error)) return error; + rgrp_lock_local(rs->rs_rgd); if (!gfs2_rs_active(rs) && (loops < 2) && gfs2_rgrp_congested(rs->rs_rgd, loops)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) { error = update_rgrp_lvb(rs->rs_rgd); if (unlikely(error)) { + rgrp_unlock_local(rs->rs_rgd); gfs2_glock_dq_uninit(&ip->i_rgd_gh); return error; } @@ -2142,6 +2150,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap) rs->rs_reserved = blocks_available; rgd->rd_reserved += rs->rs_reserved; spin_unlock(&rgd->rd_rsspin); + rgrp_unlock_local(rs->rs_rgd); return 0; check_rgrp: /* Check for unlinked inodes which can be reclaimed */ @@ -2149,6 +2158,8 @@ check_rgrp: try_rgrp_unlink(rs->rs_rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + rgrp_unlock_local(rs->rs_rgd); + /* Drop reservation, if we couldn't use reserved rgrp */ if (gfs2_rs_active(rs)) gfs2_rs_deltree(rs); @@ -2293,6 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *trs; const struct rb_node *n; + spin_lock(&rgd->rd_rsspin); gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n", fs_id_buf, (unsigned long long)rgd->rd_addr, rgd->rd_flags, @@ -2306,7 +2318,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd, be32_to_cpu(rgl->rl_free), be32_to_cpu(rgl->rl_dinodes)); } - spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs, fs_id_buf); @@ -2421,6 +2432,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, BUG_ON(ip->i_res.rs_reserved < *nblocks); + rgrp_lock_local(rbm.rgd); if (gfs2_rs_active(&ip->i_res)) { gfs2_set_alloc_start(&rbm, ip, dinode); error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false); @@ -2477,6 +2489,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rbm.rgd); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) @@ -2490,6 +2503,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, return 0; rgrp_error: + rgrp_unlock_local(rbm.rgd); gfs2_rgrp_error(rbm.rgd); return -EIO; } @@ -2509,12 +2523,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE); trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); /* Directories keep their data in the metadata address space */ if (meta || ip->i_depth || gfs2_is_jdata(ip)) @@ -2550,17 +2566,20 @@ void gfs2_unlink_di(struct inode *inode) rgd = gfs2_blk2rgrpd(sdp, blkno, true); if (!rgd) return; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED); trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1); + rgrp_unlock_local(rgd); } void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) { struct gfs2_sbd *sdp = rgd->rd_sbd; + rgrp_lock_local(rgd); rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE); if (!rgd->rd_dinodes) gfs2_consist_rgrpd(rgd); @@ -2569,6 +2588,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); + rgrp_unlock_local(rgd); be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1); gfs2_statfs_change(sdp, 0, +1, -1); @@ -2583,6 +2603,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) * @no_addr: The block number to check * @type: The block type we are looking for * + * The inode glock of @no_addr must be held. The @type to check for is either + * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE + * or GFS2_BLKST_USED would make no sense. + * * Returns: 0 if the block type matches the expected type * -ESTALE if it doesn't match * or -ve errno if something went wrong while checking @@ -2606,6 +2630,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) rbm.rgd = rgd; error = gfs2_rbm_from_block(&rbm, no_addr); if (!WARN_ON_ONCE(error)) { + /* + * No need to take the local resource group lock here; the + * inode glock of @no_addr provides the necessary + * synchronization in case the block is an inode. (In case + * the block is not an inode, the block type will not match + * the @type we are looking for.) + */ if (gfs2_testbit(&rbm, false) != type) error = -ESTALE; } @@ -2730,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) } } +void rgrp_lock_local(struct gfs2_rgrpd *rgd) +{ + BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) && + !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags)); + mutex_lock(&rgd->rd_mutex); +} + +void rgrp_unlock_local(struct gfs2_rgrpd *rgd) +{ + mutex_unlock(&rgd->rd_mutex); +} diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index be1b2034f5ee..a6855fd796e0 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block) } extern void check_and_update_goal(struct gfs2_inode *ip); + +extern void rgrp_lock_local(struct gfs2_rgrpd *rgd); +extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd); + #endif /* __RGRP_DOT_H__ */ -- cgit v1.2.3 From 2129b4288852cf872c42870c7f6e813ce0611199 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Dec 2020 16:14:30 +0100 Subject: gfs2: Per-revoke accounting in transactions In the log, revokes are stored as a revoke descriptor (struct gfs2_log_descriptor), followed by zero or more additional revoke blocks (struct gfs2_meta_header). On filesystems with a blocksize of 4k, the revoke descriptor contains up to 503 revokes, and the metadata blocks contain up to 509 revokes each. We've so far been reserving space for revokes in transactions in block granularity, so a lot more space than necessary was being allocated and then released again. This patch switches to assigning revokes to transactions individually instead. Initially, space for the revoke descriptor is reserved and handed out to transactions. When more revokes than that are reserved, additional revoke blocks are added. When the log is flushed, the space for the additional revoke blocks is released, but we keep the space for the revoke descriptor block allocated. Transactions may still reserve more revokes than they will actually need in the end, but now we won't overshoot the target as much, and by only returning the space for excess revokes at log flush time, we further reduce the amount of contention between processes. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 8 +--- fs/gfs2/incore.h | 3 +- fs/gfs2/log.c | 114 ++++++++++++++++++++++++++++++++++++++++----------- fs/gfs2/log.h | 6 ++- fs/gfs2/lops.c | 1 - fs/gfs2/ops_fstype.c | 7 ++++ fs/gfs2/trans.c | 34 +++++++++++---- 7 files changed, 131 insertions(+), 42 deletions(-) (limited to 'fs/gfs2/lops.c') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index a067924341e3..8e32d569c8bf 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -136,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); - unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; if (!revokes) return; - while (revokes > max_revokes) - max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); - - ret = gfs2_trans_begin(sdp, 0, max_revokes); + ret = gfs2_trans_begin(sdp, 0, revokes); if (ret) return; - __gfs2_ail_flush(gl, fsync, max_revokes); + __gfs2_ail_flush(gl, fsync, revokes); gfs2_trans_end(sdp); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_AIL_FLUSH); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 51656b053170..30f67c3c87b0 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -506,7 +506,6 @@ struct gfs2_trans { unsigned int tr_num_buf_rm; unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; - unsigned int tr_num_revoke_rm; struct list_head tr_list; struct list_head tr_databuf; @@ -821,7 +820,6 @@ struct gfs2_sbd { struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; - int sd_log_committed_revoke; atomic_t sd_log_pinned; unsigned int sd_log_num_revoke; @@ -834,6 +832,7 @@ struct gfs2_sbd { atomic_t sd_log_thresh2; atomic_t sd_log_blks_free; atomic_t sd_log_blks_needed; + atomic_t sd_log_revokes_available; wait_queue_head_t sd_log_waitq; wait_queue_head_t sd_logd_waitq; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 913150e60f52..68e7afdd19a8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -429,6 +429,32 @@ bool gfs2_log_is_empty(struct gfs2_sbd *sdp) { return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks; } +static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes) +{ + unsigned int available; + + available = atomic_read(&sdp->sd_log_revokes_available); + while (available >= revokes) { + if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available, + &available, available - revokes)) + return true; + } + return false; +} + +/** + * gfs2_log_release_revokes - Release a given number of revokes + * @sdp: The GFS2 superblock + * @revokes: The number of revokes to release + * + * sdp->sd_log_flush_lock must be held. + */ +void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes) +{ + if (revokes) + atomic_add(revokes, &sdp->sd_log_revokes_available); +} + /** * gfs2_log_release - Release a given number of log blocks * @sdp: The GFS2 superblock @@ -519,15 +545,59 @@ reserved: } /** - * gfs2_log_reserve - Make a log reservation + * gfs2_log_try_reserve - Try to make a log reservation * @sdp: The GFS2 superblock - * @blks: The number of blocks to reserve + * @tr: The transaction + * @extra_revokes: The number of additional revokes reserved (output) + * + * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be + * held for correct revoke accounting. */ -void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) +bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes) { + unsigned int blks = tr->tr_reserved; + unsigned int revokes = tr->tr_revokes; + unsigned int revoke_blks = 0; + + *extra_revokes = 0; + if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) { + revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); + *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; + blks += revoke_blks; + } + if (!blks) + return true; if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS)) - return; + return true; + if (!revoke_blks) + gfs2_log_release_revokes(sdp, revokes); + return false; +} + +/** + * gfs2_log_reserve - Make a log reservation + * @sdp: The GFS2 superblock + * @tr: The transaction + * @extra_revokes: The number of additional revokes reserved (output) + * + * sdp->sd_log_flush_lock must not be held. + */ + +void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes) +{ + unsigned int blks = tr->tr_reserved; + unsigned int revokes = tr->tr_revokes; + unsigned int revoke_blks = 0; + + *extra_revokes = 0; + if (revokes) { + revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs); + *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes; + blks += revoke_blks; + } __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS); } @@ -588,9 +658,6 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp) blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp)); } - - if (sdp->sd_log_committed_revoke > 0) - reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke) - 1; return reserved; } @@ -730,14 +797,9 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl) void gfs2_flush_revokes(struct gfs2_sbd *sdp) { /* number of revokes we still have room for */ - unsigned int max_revokes; + unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available); gfs2_log_lock(sdp); - max_revokes = sdp->sd_ldptrs; - if (sdp->sd_log_num_revoke > sdp->sd_ldptrs) - max_revokes += roundup(sdp->sd_log_num_revoke - sdp->sd_ldptrs, - sdp->sd_inptrs); - max_revokes -= sdp->sd_log_num_revoke; gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); } @@ -955,6 +1017,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) unsigned int reserved_blocks = 0, used_blocks = 0; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); unsigned int first_log_head; + unsigned int reserved_revokes = 0; down_write(&sdp->sd_log_flush_lock); trace_gfs2_log_flush(sdp, 1, flags); @@ -979,13 +1042,15 @@ repeat: if (reserved_blocks) gfs2_log_release(sdp, reserved_blocks); reserved_blocks = sdp->sd_log_blks_reserved; + reserved_revokes = sdp->sd_log_num_revoke; if (tr) { sdp->sd_log_tr = NULL; tr->tr_first = first_log_head; - if (unlikely (state == SFS_FROZEN)) + if (unlikely (state == SFS_FROZEN)) { if (gfs2_assert_withdraw_delayed(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) goto out_withdraw; + } } } else if (!reserved_blocks) { unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS; @@ -1000,17 +1065,15 @@ repeat: down_write(&sdp->sd_log_flush_lock); goto repeat; } + BUG_ON(sdp->sd_log_num_revoke); } if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (unlikely(state == SFS_FROZEN)) - if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) + if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) goto out_withdraw; - if (gfs2_assert_withdraw_delayed(sdp, - sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke)) - goto out_withdraw; gfs2_ordered_write(sdp); if (gfs2_withdrawn(sdp)) @@ -1034,7 +1097,6 @@ repeat: gfs2_log_lock(sdp); sdp->sd_log_blks_reserved = 0; - sdp->sd_log_committed_revoke = 0; spin_lock(&sdp->sd_ail_lock); if (tr && !list_empty(&tr->tr_ail1_list)) { @@ -1060,11 +1122,16 @@ repeat: out_end: used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head); - if (gfs2_assert_withdraw_delayed(sdp, used_blocks <= reserved_blocks)) - goto out; + reserved_revokes += atomic_read(&sdp->sd_log_revokes_available); + atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs); + gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs); + if (reserved_revokes > sdp->sd_ldptrs) + reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; out: - if (used_blocks != reserved_blocks) + if (used_blocks != reserved_blocks) { + gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); gfs2_log_release(sdp, reserved_blocks - used_blocks); + } up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); if (gfs2_withdrawing(sdp)) @@ -1105,8 +1172,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new) old->tr_num_databuf_new += new->tr_num_databuf_new; old->tr_num_buf_rm += new->tr_num_buf_rm; old->tr_num_databuf_rm += new->tr_num_databuf_rm; + old->tr_revokes += new->tr_revokes; old->tr_num_revoke += new->tr_num_revoke; - old->tr_num_revoke_rm += new->tr_num_revoke_rm; list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); list_splice_tail_init(&new->tr_buf, &old->tr_buf); @@ -1133,7 +1200,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) set_bit(TR_ATTACHED, &tr->tr_flags); } - sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; reserved = calc_reserved(sdp); maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; gfs2_assert_withdraw(sdp, maxres >= reserved); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index e7f4a8d6be64..eea58015710e 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -74,8 +74,12 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp); +extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes); extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); +extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); +extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, + unsigned int *extra_revokes); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, int op_flags); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 8658ebbcb4a9..dfe8537cb88e 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -857,7 +857,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - gfs2_log_write_page(sdp, page); page = mempool_alloc(gfs2_page_pool, GFP_NOIO); mh = page_address(page); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 986dc2ebebf0..316ca5fc99e8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -315,6 +315,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent) sizeof(struct gfs2_meta_header)) * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */ + /* + * We always keep at least one block reserved for revokes in + * transactions. This greatly simplifies allocating additional + * revoke blocks. + */ + atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs); + /* Compute maximum reservation required to add a entry to a directory */ hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH), diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 231ca1a41a73..ab96cf0bf26b 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -31,16 +31,18 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr) fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n", tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, test_bit(TR_TOUCHED, &tr->tr_flags)); - fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n", tr->tr_num_buf_new, tr->tr_num_buf_rm, tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + tr->tr_num_revoke); } int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes, unsigned long ip) { + unsigned int extra_revokes; + if (current->journal_info) { gfs2_print_trans(sdp, current->journal_info); BUG(); @@ -62,8 +64,6 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, */ tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp)); } - if (revokes) - tr->tr_reserved += gfs2_struct2blk(sdp, revokes) - 1; INIT_LIST_HEAD(&tr->tr_databuf); INIT_LIST_HEAD(&tr->tr_buf); INIT_LIST_HEAD(&tr->tr_list); @@ -75,10 +75,26 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, sb_start_intwrite(sdp->sd_vfs); - gfs2_log_reserve(sdp, tr->tr_reserved); + /* + * Try the reservations under sd_log_flush_lock to prevent log flushes + * from creating inconsistencies between the number of allocated and + * reserved revokes. If that fails, do a full-block allocation outside + * of the lock to avoid stalling log flushes. Then, allot the + * appropriate number of blocks to revokes, use as many revokes locally + * as needed, and "release" the surplus into the revokes pool. + */ down_read(&sdp->sd_log_flush_lock); + if (gfs2_log_try_reserve(sdp, tr, &extra_revokes)) + goto reserved; + up_read(&sdp->sd_log_flush_lock); + gfs2_log_reserve(sdp, tr, &extra_revokes); + down_read(&sdp->sd_log_flush_lock); + +reserved: + gfs2_log_release_revokes(sdp, extra_revokes); if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release_revokes(sdp, tr->tr_revokes); up_read(&sdp->sd_log_flush_lock); gfs2_log_release(sdp, tr->tr_reserved); sb_end_intwrite(sdp->sd_vfs); @@ -113,14 +129,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) current->journal_info = NULL; if (!test_bit(TR_TOUCHED, &tr->tr_flags)) { - gfs2_log_release(sdp, tr->tr_reserved); + gfs2_log_release_revokes(sdp, tr->tr_revokes); up_read(&sdp->sd_log_flush_lock); + gfs2_log_release(sdp, tr->tr_reserved); if (!test_bit(TR_ONSTACK, &tr->tr_flags)) gfs2_trans_free(sdp, tr); sb_end_intwrite(sdp->sd_vfs); return; } + gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke); + nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new; nbuf -= tr->tr_num_buf_rm; nbuf -= tr->tr_num_databuf_rm; @@ -278,7 +297,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) { struct gfs2_bufdata *bd, *tmp; - struct gfs2_trans *tr = current->journal_info; unsigned int n = len; gfs2_log_lock(sdp); @@ -290,7 +308,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) if (bd->bd_gl) gfs2_glock_remove_revoke(bd->bd_gl); kmem_cache_free(gfs2_bufdata_cachep, bd); - tr->tr_num_revoke_rm++; + gfs2_log_release_revokes(sdp, 1); if (--n == 0) break; } -- cgit v1.2.3