[Cluster-devel] [GFS2 PATCH] GFS2: Break ordered_write list by rgrp for faster sorting

Bob Peterson rpeterso at redhat.com
Thu Aug 9 19:18:16 UTC 2012


Hi,

This patch moves the ordered_write buffer list from the superblock
to the rgrps. That makes for several lists that are smaller, and
therefore faster to find and sort.

Regards,

Bob Peterson
Red Hat File Systems

Signed-off-by: Bob Peterson <rpeterso at redhat.com> 
---
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 01c4975..3708e39 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1083,8 +1083,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
 			bh->b_private = NULL;
 		}
 		gfs2_log_unlock(sdp);
-		if (bd)
+		if (bd) {
+			BUG_ON(!list_empty(&bd->bd_list));
 			kmem_cache_free(gfs2_bufdata_cachep, bd);
+		}
 
 		bh = bh->b_this_page;
 	} while (bh != head);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 99d7c64..d380152 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -97,6 +97,8 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
+	struct list_head rd_log_le_ordered;
+	struct list_head rd_log_le_writing;
 	spinlock_t rd_rsspin;           /* protects reservation related vars */
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
 	u32 rd_rs_cnt;                  /* count of current reservations */
@@ -723,7 +725,6 @@ struct gfs2_sbd {
 	struct list_head sd_log_le_buf;
 	struct list_head sd_log_le_revoke;
 	struct list_head sd_log_le_databuf;
-	struct list_head sd_log_le_ordered;
 
 	atomic_t sd_log_thresh1;
 	atomic_t sd_log_thresh2;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f4beeb9..ce3e86d 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
 #include "log.h"
 #include "lops.h"
 #include "meta_io.h"
+#include "rgrp.h"
 #include "util.h"
 #include "dir.h"
 #include "trace_gfs2.h"
@@ -500,29 +501,38 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
 {
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
-	LIST_HEAD(written);
+	struct gfs2_rgrpd *rgd, *rg1;
 
 	gfs2_log_lock(sdp);
-	list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp);
-	while (!list_empty(&sdp->sd_log_le_ordered)) {
-		bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list);
-		list_move(&bd->bd_list, &written);
-		bh = bd->bd_bh;
-		if (!buffer_dirty(bh))
-			continue;
-		get_bh(bh);
-		gfs2_log_unlock(sdp);
-		lock_buffer(bh);
-		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
-			bh->b_end_io = end_buffer_write_sync;
-			submit_bh(WRITE_SYNC, bh);
-		} else {
-			unlock_buffer(bh);
-			brelse(bh);
+	rgd = rg1 = gfs2_rgrpd_get_first(sdp);
+	while (rgd) {
+		if (!list_empty(&rgd->rd_log_le_ordered) &&
+		    !list_is_last(rgd->rd_log_le_ordered.next,
+				  &rgd->rd_log_le_ordered))
+			list_sort(NULL, &rgd->rd_log_le_ordered, &bd_cmp);
+		while (!list_empty(&rgd->rd_log_le_ordered)) {
+			bd = list_entry(rgd->rd_log_le_ordered.next,
+					struct gfs2_bufdata, bd_list);
+			list_move(&bd->bd_list, &rgd->rd_log_le_writing);
+			bh = bd->bd_bh;
+			if (!buffer_dirty(bh))
+				continue;
+			get_bh(bh);
+			gfs2_log_unlock(sdp);
+			lock_buffer(bh);
+			if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) {
+				bh->b_end_io = end_buffer_write_sync;
+				submit_bh(WRITE_SYNC, bh);
+			} else {
+				unlock_buffer(bh);
+				brelse(bh);
+			}
+			gfs2_log_lock(sdp);
 		}
-		gfs2_log_lock(sdp);
+		rgd = gfs2_rgrpd_get_next(rgd);
+		if (rgd == rg1)
+			break;
 	}
-	list_splice(&written, &sdp->sd_log_le_ordered);
 	gfs2_log_unlock(sdp);
 }
 
@@ -530,20 +540,28 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
 {
 	struct gfs2_bufdata *bd;
 	struct buffer_head *bh;
+	struct gfs2_rgrpd *rgd, *rg1;
 
 	gfs2_log_lock(sdp);
-	while (!list_empty(&sdp->sd_log_le_ordered)) {
-		bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list);
-		bh = bd->bd_bh;
-		if (buffer_locked(bh)) {
-			get_bh(bh);
-			gfs2_log_unlock(sdp);
-			wait_on_buffer(bh);
-			brelse(bh);
-			gfs2_log_lock(sdp);
-			continue;
+	rgd = rg1 = gfs2_rgrpd_get_first(sdp);
+	while (rgd) {
+		while (!list_empty(&rgd->rd_log_le_writing)) {
+			bd = list_entry(rgd->rd_log_le_writing.prev,
+					struct gfs2_bufdata, bd_list);
+			list_del_init(&bd->bd_list);
+			bh = bd->bd_bh;
+			if (bh && buffer_locked(bh)) {
+				get_bh(bh);
+				gfs2_log_unlock(sdp);
+				wait_on_buffer(bh);
+				brelse(bh);
+				gfs2_log_lock(sdp);
+				continue;
+			}
 		}
-		list_del_init(&bd->bd_list);
+		rgd = gfs2_rgrpd_get_next(rgd);
+		if (rgd == rg1)
+			break;
 	}
 	gfs2_log_unlock(sdp);
 }
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 8ff95a2..39c483e 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -672,6 +672,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 		gl = bd->bd_gl;
 		atomic_dec(&gl->gl_revokes);
 		clear_bit(GLF_LFLUSH, &gl->gl_flags);
+		BUG_ON(!list_empty(&bd->bd_list));
 		kmem_cache_free(gfs2_bufdata_cachep, bd);
 	}
 }
@@ -776,6 +777,7 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 	struct gfs2_trans *tr = current->journal_info;
 	struct address_space *mapping = bd->bd_bh->b_page->mapping;
 	struct gfs2_inode *ip = GFS2_I(mapping->host);
+	struct gfs2_rgrpd *rgd;
 
 	lock_buffer(bd->bd_bh);
 	gfs2_log_lock(sdp);
@@ -791,7 +793,13 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 		sdp->sd_log_num_databuf++;
 		list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf);
 	} else {
-		list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered);
+		if (ip->i_rgd &&
+		    rgrp_contains_block(ip->i_rgd, bd->bd_bh->b_blocknr))
+			rgd = ip->i_rgd;
+		else
+			rgd = gfs2_blk2rgrpd(sdp, bd->bd_bh->b_blocknr, 1);
+		BUG_ON(rgd == NULL);
+		list_add_tail(&bd->bd_list, &rgd->rd_log_le_ordered);
 	}
 out:
 	gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e5af9dc..6b72d07 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -100,7 +100,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	INIT_LIST_HEAD(&sdp->sd_log_le_buf);
 	INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
 	INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
-	INIT_LIST_HEAD(&sdp->sd_log_le_ordered);
 
 	init_waitqueue_head(&sdp->sd_log_waitq);
 	init_waitqueue_head(&sdp->sd_logd_waitq);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 47d2346..f9b2baf 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -331,13 +331,6 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 	}
 }
 
-static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
-{
-	u64 first = rgd->rd_data0;
-	u64 last = first + rgd->rd_data;
-	return first <= block && block < last;
-}
-
 /**
  * gfs2_blk2rgrpd - Find resource group for a given data/meta block number
  * @sdp: The GFS2 superblock
@@ -754,6 +747,8 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
 	rgd->rd_data = be32_to_cpu(buf.ri_data);
 	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
+	INIT_LIST_HEAD(&rgd->rd_log_le_ordered);
+	INIT_LIST_HEAD(&rgd->rd_log_le_writing);
 	spin_lock_init(&rgd->rd_rsspin);
 
 	error = compute_bitstructs(rgd);
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c98f6af..a379320 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -73,6 +73,13 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 				   const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
 extern int gfs2_fitrim(struct file *filp, void __user *argp);
 
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
+{
+	u64 first = rgd->rd_data0;
+	u64 last = first + rgd->rd_data;
+	return first <= block && block < last;
+}
+
 /* This is how to tell if a reservation is in the rgrp tree: */
 static inline bool gfs2_rs_active(struct gfs2_blkreserv *rs)
 {
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index adbd278..724f724 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -186,6 +186,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
 			list_del_init(&bd->bd_list);
 			gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
 			sdp->sd_log_num_revoke--;
+			BUG_ON(!list_empty(&bd->bd_list));
 			kmem_cache_free(gfs2_bufdata_cachep, bd);
 			tr->tr_num_revoke_rm++;
 			if (--n == 0)




More information about the Cluster-devel mailing list