[Cluster-devel] [PATCH 11/11] gfs2: Add local resource group locking

Andreas Gruenbacher agruenba at redhat.com
Fri Oct 5 19:18:54 UTC 2018


From: Bob Peterson <rpeterso at redhat.com>

Prepare for treating resource group glocks as exclusive among nodes but
shared among all tasks running on a node: introduce another layer of
node-specific locking that the local tasks can use to coordinate their
accesses.

This patch only introduces the local locking changes necessary so that
future patches can introduce resource group glock sharing.  We replace
the resource group spinlock with a mutex; whether that leads to
noticeable additional contention on the resource group mutex remains to
be seen.

Signed-off-by: Andreas Gruenbacher <agruenba at redhat.com>
---
 fs/gfs2/incore.h |  3 +-
 fs/gfs2/lops.c   |  5 ++-
 fs/gfs2/rgrp.c   | 97 +++++++++++++++++++++++++++++++++++-------------
 fs/gfs2/rgrp.h   |  4 ++
 4 files changed, 81 insertions(+), 28 deletions(-)

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 932e63924f7e..2fa47b476eef 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -23,6 +23,7 @@
 #include <linux/percpu.h>
 #include <linux/lockref.h>
 #include <linux/rhashtable.h>
+#include <linux/mutex.h>
 
 #define DIO_WAIT	0x00000010
 #define DIO_METADATA	0x00000020
@@ -120,7 +121,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
 #define GFS2_RDF_PREFERRED	0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
-	spinlock_t rd_rsspin;           /* protects reservation related vars */
+	struct mutex rd_lock;
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
 };
 
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4c7069b8f3c1..a9e858e01c97 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
 	struct gfs2_bitmap *bi = rgd->rd_bits + index;
 
+	rgrp_lock_local(rgd);
 	if (bi->bi_clone == NULL)
-		return;
+		goto out;
 	if (sdp->sd_args.ar_discard)
 		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
 	memcpy(bi->bi_clone + bi->bi_offset,
@@ -85,6 +86,8 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
 	clear_bit(GBF_FULL, &bi->bi_flags);
 	rgd->rd_free_clone = rgd->rd_free;
 	rgd->rd_extfail_pt = rgd->rd_free;
+out:
+	rgrp_unlock_local(rgd);
 }
 
 /**
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8a6b41f3667c..a89be4782c15 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -702,10 +702,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 
 	rgd = rs->rs_rgd;
 	if (rgd) {
-		spin_lock(&rgd->rd_rsspin);
+		rgrp_lock_local(rgd);
 		__rs_deltree(rs);
 		BUG_ON(rs->rs_free);
-		spin_unlock(&rgd->rd_rsspin);
+		rgrp_unlock_local(rgd);
 	}
 }
 
@@ -737,12 +737,12 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd)
 	struct rb_node *n;
 	struct gfs2_blkreserv *rs;
 
-	spin_lock(&rgd->rd_rsspin);
+	rgrp_lock_local(rgd);
 	while ((n = rb_first(&rgd->rd_rstree))) {
 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 		__rs_deltree(rs);
 	}
-	spin_unlock(&rgd->rd_rsspin);
+	rgrp_unlock_local(rgd);
 }
 
 void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
@@ -948,7 +948,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 	rgd->rd_data = be32_to_cpu(buf.ri_data);
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
-	spin_lock_init(&rgd->rd_rsspin);
+	mutex_init(&rgd->rd_lock);
 
 	error = compute_bitstructs(rgd);
 	if (error)
@@ -1469,9 +1469,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 			/* Trim each bitmap in the rgrp */
 			for (x = 0; x < rgd->rd_length; x++) {
 				struct gfs2_bitmap *bi = rgd->rd_bits + x;
+				rgrp_lock_local(rgd);
 				ret = gfs2_rgrp_send_discards(sdp,
 						rgd->rd_data0, NULL, bi, minlen,
 						&amt);
+				rgrp_unlock_local(rgd);
 				if (ret) {
 					gfs2_glock_dq_uninit(&gh);
 					goto out;
@@ -1483,9 +1485,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
 			if (ret == 0) {
 				bh = rgd->rd_bits[0].bi_bh;
+				rgrp_lock_local(rgd);
 				rgd->rd_flags |= GFS2_RGF_TRIMMED;
 				gfs2_trans_add_meta(rgd->rd_gl, bh);
 				gfs2_rgrp_out(rgd, bh->b_data);
+				rgrp_unlock_local(rgd);
 				gfs2_trans_end(sdp);
 			}
 		}
@@ -1519,7 +1523,6 @@ static void rs_insert(struct gfs2_inode *ip)
 
 	BUG_ON(gfs2_rs_active(rs));
 
-	spin_lock(&rgd->rd_rsspin);
 	newn = &rgd->rd_rstree.rb_node;
 	while (*newn) {
 		struct gfs2_blkreserv *cur =
@@ -1532,7 +1535,6 @@ static void rs_insert(struct gfs2_inode *ip)
 		else if (rc < 0)
 			newn = &((*newn)->rb_left);
 		else {
-			spin_unlock(&rgd->rd_rsspin);
 			WARN_ON(1);
 			return;
 		}
@@ -1540,7 +1542,6 @@ static void rs_insert(struct gfs2_inode *ip)
 
 	rb_link_node(&rs->rs_node, parent, newn);
 	rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
-	spin_unlock(&rgd->rd_rsspin);
 	trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
 
@@ -1632,7 +1633,6 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 	struct rb_node *n;
 	int rc;
 
-	spin_lock(&rgd->rd_rsspin);
 	n = rgd->rd_rstree.rb_node;
 	while (n) {
 		rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
@@ -1655,7 +1655,6 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 		}
 	}
 
-	spin_unlock(&rgd->rd_rsspin);
 	return block;
 }
 
@@ -1857,7 +1856,22 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
 	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
 
 	while (1) {
+		/*
+		 * We must be careful to avoid deadlock here:
+		 * All transactions expect: sd_log_flush_lock followed by rgrp
+		 * ex (if needed), but try_rgrp_unlink takes sd_log_flush_lock
+		 * outside a transaction and therefore must not have the rgrp
+		 * ex already held. To avoid deadlock, we drop the rgrp ex lock
+		 * before taking the log_flush_lock, then reacquire it to
+		 * protect our call to gfs2_rbm_find.
+		 *
+		 * Also note that rgrp_unlock_local must come AFTER the caller does
+		 * gfs2_rs_deltree because rgrp ex needs to be held before
+		 * making reservations.
+		 */
+		rgrp_unlock_local(rgd);
 		down_write(&sdp->sd_log_flush_lock);
+		rgrp_lock_local(rgd);
 		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
 				      true);
 		up_write(&sdp->sd_log_flush_lock);
@@ -2055,7 +2069,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *begin = NULL;
 	struct gfs2_blkreserv *rs = &ip->i_res;
-	int error = 0, rg_locked, flags = 0;
+	int error = 0, flags = 0;
+	bool rg_locked;
 	u64 last_unlinked = NO_BLOCK;
 	int loops = 0;
 	u32 free_blocks, skip = 0;
@@ -2081,10 +2096,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 		return -EBADSLT;
 
 	while (loops < 3) {
-		rg_locked = 1;
-
-		if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
-			rg_locked = 0;
+		rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+		if (rg_locked) {
+			rgrp_lock_local(rs->rs_rgd);
+		} else {
 			if (skip && skip--)
 				goto next_rgrp;
 			if (!gfs2_rs_active(rs)) {
@@ -2101,12 +2116,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 						   &ip->i_rgd_gh);
 			if (unlikely(error))
 				return error;
+			rgrp_lock_local(rs->rs_rgd);
 			if (!gfs2_rs_active(rs) && (loops < 2) &&
 			    gfs2_rgrp_congested(rs->rs_rgd, loops))
 				goto skip_rgrp;
 			if (sdp->sd_args.ar_rgrplvb) {
 				error = update_rgrp_lvb(rs->rs_rgd);
 				if (unlikely(error)) {
+					rgrp_unlock_local(rs->rs_rgd);
 					gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 					return error;
 				}
@@ -2140,9 +2157,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 			rs->rs_reserved = ap->target;
 			if (rs->rs_reserved > free_blocks)
 				rs->rs_reserved = free_blocks;
-			spin_lock(&rs->rs_rgd->rd_rsspin);
 			rgd->rd_reserved += rs->rs_reserved;
-			spin_unlock(&rs->rs_rgd->rd_rsspin);
+			rgrp_unlock_local(rs->rs_rgd);
 			return 0;
 		}
 check_rgrp:
@@ -2151,6 +2167,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
 			try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
 					ip->i_no_addr);
 skip_rgrp:
+		rgrp_unlock_local(rs->rs_rgd);
+
 		/* Drop reservation, if we couldn't use reserved rgrp */
 		if (gfs2_rs_active(rs))
 			gfs2_rs_deltree(rs);
@@ -2199,10 +2217,10 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 	if (rs->rs_reserved) {
 		struct gfs2_rgrpd *rgd = rs->rs_rgd;
 
-		spin_lock(&rgd->rd_rsspin);
+		rgrp_lock_local(rgd);
 		BUG_ON(rgd->rd_reserved < rs->rs_reserved);
 		rgd->rd_reserved -= rs->rs_reserved;
-		spin_unlock(&rs->rs_rgd->rd_rsspin);
+		rgrp_unlock_local(rgd);
 		rs->rs_reserved = 0;
 	}
 	if (gfs2_holder_initialized(&ip->i_rgd_gh))
@@ -2304,12 +2322,12 @@ void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 			       be32_to_cpu(rgl->rl_free),
 			       be32_to_cpu(rgl->rl_dinodes));
 	}
-	spin_lock(&rgd->rd_rsspin);
+	rgrp_lock_local(rgd);
 	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
 		trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
 		dump_rs(seq, trs);
 	}
-	spin_unlock(&rgd->rd_rsspin);
+	rgrp_unlock_local(rgd);
 }
 
 static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
@@ -2339,7 +2357,6 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 	struct gfs2_blkreserv *rs = &ip->i_res;
 	struct gfs2_rgrpd *rgd = rbm->rgd;
 
-	spin_lock(&rgd->rd_rsspin);
 	BUG_ON(rs->rs_reserved < len);
 	rgd->rd_reserved -= len;
 	rs->rs_reserved -= len;
@@ -2355,15 +2372,13 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
 			    rs->rs_free)
-				goto out;
+				return;
 			/* We used up our block reservation, so we should
 			   reserve more blocks next time. */
 			atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
 		}
 		__rs_deltree(rs);
 	}
-out:
-	spin_unlock(&rgd->rd_rsspin);
 }
 
 /**
@@ -2416,6 +2431,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	BUG_ON(ip->i_res.rs_reserved < *nblocks);
 
+	rgrp_lock_local(rbm.rgd);
 	gfs2_set_alloc_start(&rbm, ip, dinode);
 	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
 
@@ -2460,6 +2476,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 	}
 
 	rbm.rgd->rd_free -= *nblocks;
+	rbm.rgd->rd_free_clone -= *nblocks;
 	if (dinode) {
 		rbm.rgd->rd_dinodes++;
 		*generation = rbm.rgd->rd_igeneration++;
@@ -2469,6 +2486,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rbm.rgd);
 
 	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
 	if (dinode)
@@ -2476,13 +2494,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
 	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
-	rbm.rgd->rd_free_clone -= *nblocks;
 	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
 			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
 	*bn = block;
 	return 0;
 
 rgrp_error:
+	rgrp_unlock_local(rbm.rgd);
 	gfs2_rgrp_error(rbm.rgd);
 	return -EIO;
 }
@@ -2502,12 +2520,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
 	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
 	rgd->rd_free += blen;
 	rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rgd);
 
 	/* Directories keep their data in the metadata address space */
 	if (meta || ip->i_depth)
@@ -2543,17 +2563,20 @@ void gfs2_unlink_di(struct inode *inode)
 	rgd = gfs2_blk2rgrpd(sdp, blkno, true);
 	if (!rgd)
 		return;
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
 	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+	rgrp_unlock_local(rgd);
 }
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 
+	rgrp_lock_local(rgd);
 	rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
 	if (!rgd->rd_dinodes)
 		gfs2_consist_rgrpd(rgd);
@@ -2562,6 +2585,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 
 	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
 	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+	rgrp_unlock_local(rgd);
 	be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
 
 	gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2576,6 +2600,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
  * @no_addr: The block number to check
  * @type: The block type we are looking for
  *
+ * The inode glock of @no_addr must be held.  The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
  * Returns: 0 if the block type matches the expected type
  *          -ESTALE if it doesn't match
  *          or -ve errno if something went wrong while checking
@@ -2601,6 +2629,12 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
 	if (WARN_ON_ONCE(error))
 		goto fail;
 
+	/*
+	 * No need to take the local resource group lock here; the inode glock
+	 * of @no_addr provides the necessary synchronization in case the block
+	 * is an inode.  (In case the block is not an inode, the block type
+	 * will not match the @type we are looking for.)
+	 */
 	if (gfs2_testbit(&rbm, false) != type)
 		error = -ESTALE;
 
@@ -2723,3 +2757,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
 	}
 }
 
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+	BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
+	       !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
+	mutex_lock(&rgd->rd_lock);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+	mutex_unlock(&rgd->rd_lock);
+}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b596c3d17988..33e52dab76ef 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -92,4 +92,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 }
 
 extern void check_and_update_goal(struct gfs2_inode *ip);
+
+extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+
 #endif /* __RGRP_DOT_H__ */
-- 
2.17.1




More information about the Cluster-devel mailing list