[Cluster-devel] fsck: Clean up pass1 inode iteration code

Steven Whitehouse swhiteho at redhat.com
Fri Feb 22 12:08:03 UTC 2013


The original version of this patch, including readahead for the
inodes only appears to deliver a performance improvement in some
particular cases, and slows down other cases. So this patch is
fairly similar in that it includes the clean ups from the previous
attempt, however the readahead has been left out for the time
being until the problem is better understood, and can be fixed.

This clean up still has the advantage of better code structure
around the main loop, and in addition it also reduced the number
of read requests which are generated too.

Signed-off-by: Steven Whitehouse <swhiteho at redhat.com>

diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 540f2a9..9a34e97 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -14,6 +14,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <time.h>
+#include <fcntl.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
 #include <libintl.h>
@@ -1547,6 +1548,128 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
 	return 0;
 }
 
+static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n)
+{
+	struct gfs2_buffer_head *bh;
+	unsigned i;
+	uint64_t block;
+
+	for (i = 0; i < n; i++) {
+		block = ibuf[i];
+
+		/* skip gfs1 rindex indirect blocks */
+		if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
+			log_debug(_("Skipping rindex indir block "
+				    "%lld (0x%llx)\n"),
+				  (unsigned long long)block,
+				  (unsigned long long)block);
+			continue;
+		}
+		warm_fuzzy_stuff(block);
+
+		if (fsck_abort) { /* if asked to abort */
+			gfs2_special_free(&gfs1_rindex_blks);
+			return FSCK_OK;
+		}
+		if (skip_this_pass) {
+			printf( _("Skipping pass 1 is not a good idea.\n"));
+			skip_this_pass = FALSE;
+			fflush(stdout);
+		}
+		if (fsck_system_inode(sdp, block)) {
+			log_debug(_("Already processed system inode "
+				    "%lld (0x%llx)\n"),
+				  (unsigned long long)block,
+				  (unsigned long long)block);
+			continue;
+		}
+		bh = bread(sdp, block);
+
+		if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+			/* In gfs2, a bitmap mark of 2 means an inode,
+			   but in gfs1 it means any metadata.  So if
+			   this is gfs1 and not an inode, it may be
+			   okay.  If it's non-dinode metadata, it will
+			   be referenced by an inode, so we need to
+			   skip it here and it will be sorted out
+			   when the referencing inode is checked. */
+			if (sdp->gfs1) {
+				uint32_t check_magic;
+
+				check_magic = ((struct gfs2_meta_header *)
+					       (bh->b_data))->mh_magic;
+				if (be32_to_cpu(check_magic) == GFS2_MAGIC) {
+					log_debug( _("Deferring GFS1 "
+						     "metadata block #"
+						     "%" PRIu64" (0x%"
+						     PRIx64 ")\n"),
+						   block, block);
+					brelse(bh);
+					continue;
+				}
+			}
+			log_err( _("Found invalid inode at block #"
+				   "%llu (0x%llx)\n"),
+				 (unsigned long long)block,
+				 (unsigned long long)block);
+			if (gfs2_blockmap_set(bl, block, gfs2_block_free)) {
+				stack;
+				brelse(bh);
+				gfs2_special_free(&gfs1_rindex_blks);
+				return FSCK_ERROR;
+			}
+			check_n_fix_bitmap(sdp, block, gfs2_block_free);
+		} else if (handle_di(sdp, bh) < 0) {
+			stack;
+			brelse(bh);
+			gfs2_special_free(&gfs1_rindex_blks);
+			return FSCK_ERROR;
+		}
+		/* Ignore everything else - they should be hit by the
+		   handle_di step.  Don't check NONE either, because
+		   check_meta passes everything if GFS2_METATYPE_NONE
+		   is specified.  Hopefully, other metadata types such
+		   as indirect blocks will be handled when the inode
+		   itself is processed, and if it's not, it should be
+		   caught in pass5. */
+		brelse(bh);
+	}
+
+	return 0;
+}
+
+static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
+{
+	unsigned k, n, i;
+	uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t));
+	int ret;
+
+	for (k = 0; k < rgd->ri.ri_length; k++) {
+		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE);
+
+		if (n) {
+			ret = pass1_process_bitmap(sdp, rgd, ibuf, n);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		  For GFS1, we have to count the "free meta" blocks in the
+		  resource group and mark them specially so we can count them
+		  properly in pass5.
+		 */
+		if (!sdp->gfs1)
+			continue;
+
+		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED);
+		for (i = 0; i < n; i++)
+			gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta);
+	}
+
+	free(ibuf);
+	return 0;
+}
+
 /**
  * pass1 - walk through inodes and check inode state
  *
@@ -1563,12 +1686,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
 int pass1(struct gfs2_sbd *sdp)
 {
 	struct osi_node *n, *next = NULL;
-	struct gfs2_buffer_head *bh;
-	uint64_t block = 0;
 	struct rgrp_tree *rgd;
-	int first;
 	uint64_t i;
 	uint64_t rg_count = 0;
+	int ret;
 
 	osi_list_init(&gfs1_rindex_blks.list);
 
@@ -1611,115 +1732,10 @@ int pass1(struct gfs2_sbd *sdp)
 			gfs2_meta_rgrp);*/
 		}
 
-		first = 1;
+		ret = pass1_process_rgrp(sdp, rgd);
+		if (ret)
+			return ret;
 
-		while (1) {
-			/* "block" is relative to the entire file system */
-			/* Get the next dinode in the file system, according
-			   to the bitmap.  This should ONLY be dinodes unless
-			   it's GFS1, in which case it can be any metadata. */
-			if (gfs2_next_rg_meta(rgd, &block, first))
-				break;
-			/* skip gfs1 rindex indirect blocks */
-			if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
-				log_debug(_("Skipping rindex indir block "
-					    "%lld (0x%llx)\n"),
-					  (unsigned long long)block,
-					  (unsigned long long)block);
-				first = 0;
-				continue;
-			}
-			warm_fuzzy_stuff(block);
-
-			if (fsck_abort) { /* if asked to abort */
-				gfs2_special_free(&gfs1_rindex_blks);
-				return FSCK_OK;
-			}
-			if (skip_this_pass) {
-				printf( _("Skipping pass 1 is not a good idea.\n"));
-				skip_this_pass = FALSE;
-				fflush(stdout);
-			}
-			if (fsck_system_inode(sdp, block)) {
-				log_debug(_("Already processed system inode "
-					    "%lld (0x%llx)\n"),
-					  (unsigned long long)block,
-					  (unsigned long long)block);
-				first = 0;
-				continue;
-			}
-			bh = bread(sdp, block);
-
-			/*log_debug( _("Checking metadata block #%" PRIu64
-			  " (0x%" PRIx64 ")\n"), block, block);*/
-
-			if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
-				/* In gfs2, a bitmap mark of 2 means an inode,
-				   but in gfs1 it means any metadata.  So if
-				   this is gfs1 and not an inode, it may be
-				   okay.  If it's non-dinode metadata, it will
-				   be referenced by an inode, so we need to
-				   skip it here and it will be sorted out
-				   when the referencing inode is checked. */
-				if (sdp->gfs1) {
-					uint32_t check_magic;
-
-					check_magic = ((struct
-							gfs2_meta_header *)
-						       (bh->b_data))->mh_magic;
-					if (be32_to_cpu(check_magic) ==
-					    GFS2_MAGIC) {
-						log_debug( _("Deferring GFS1 "
-							     "metadata block #"
-							     "%" PRIu64" (0x%"
-							     PRIx64 ")\n"),
-							   block, block);
-						brelse(bh);
-						first = 0;
-						continue;
-					}
-				}
-				log_err( _("Found invalid inode at block #"
-					   "%llu (0x%llx)\n"),
-					 (unsigned long long)block,
-					 (unsigned long long)block);
-				if (gfs2_blockmap_set(bl, block,
-						      gfs2_block_free)) {
-					stack;
-					brelse(bh);
-					gfs2_special_free(&gfs1_rindex_blks);
-					return FSCK_ERROR;
-				}
-				check_n_fix_bitmap(sdp, block,
-						   gfs2_block_free);
-			} else if (handle_di(sdp, bh) < 0) {
-				stack;
-				brelse(bh);
-				gfs2_special_free(&gfs1_rindex_blks);
-				return FSCK_ERROR;
-			}
-			/* Ignore everything else - they should be hit by the
-			   handle_di step.  Don't check NONE either, because
-			   check_meta passes everything if GFS2_METATYPE_NONE
-			   is specified.  Hopefully, other metadata types such
-			   as indirect blocks will be handled when the inode
-			   itself is processed, and if it's not, it should be
-			   caught in pass5. */
-			brelse(bh);
-			first = 0;
-		}
-		/*
-		  For GFS1, we have to count the "free meta" blocks in the
-		  resource group and mark them specially so we can count them
-		  properly in pass5.
-		 */
-		if (!sdp->gfs1)
-			continue;
-		first = 1;
-		while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) {
-			gfs2_blockmap_set(bl, block, gfs2_freemeta);
-			first = 0;
-		}
 	}
 	gfs2_special_free(&gfs1_rindex_blks);
 	return FSCK_OK;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 46d4d67..db31a6c 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp);
 extern int do_init_inum(struct gfs2_sbd *sdp);
 extern int do_init_statfs(struct gfs2_sbd *sdp);
 extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
+extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx,
+			      uint64_t *buf, uint8_t state);
 extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block,
 			     int first);
 extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
 				 uint64_t *block, uint32_t type, int first);
-extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block,
-				 int first);
 
 /* super.c */
 extern int check_sb(struct gfs2_sb *sb);
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index 645c45a..f9231ca 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
 	return 0;
 }
 
+unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state)
+{
+	struct gfs2_bitmap *bi = &rgd->bits[idx];
+	unsigned n = 0;
+	uint32_t blk = 0;
+
+	while(blk < (bi->bi_len * GFS2_NBBY)) {
+		blk = gfs2_bitfit((const unsigned char *)rgd->bh[idx]->b_data + bi->bi_offset,
+				  bi->bi_len, blk, state);
+		if (blk == BFITNOENT)
+			break;
+		buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0;
+		blk++;
+	}
+
+	return n;
+}
+
 /**
  * gfs2_next_rg_meta
  * @rgd:
@@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first)
 	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE);
 }
 
-int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first)
-{
-	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED);
-}
-
 /**
  * next_rg_metatype
  * @rgd:





More information about the Cluster-devel mailing list