[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] fsck: Add readahead to inodes in pass1



Pass 1 in fsck reads all the inodes, in disk block order based upon the
rgrp bitmap. This patch adds a function to create a list of blocks of a
certain type from the rgrp bitmap. Using this list it is then possible
to do readahead on the inode blocks.

This gives me a gain of around 25% improvement in speed of overall fsck
time for my 500G test filesystem.

It is also a nice cleanup, since it splits the pass1 function into more
easily understood components.

Signed-off-by: Steven Whitehouse <swhiteho redhat com>

diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
index 540f2a9..4c67a83 100644
--- a/gfs2/fsck/pass1.c
+++ b/gfs2/fsck/pass1.c
@@ -14,6 +14,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <time.h>
+#include <fcntl.h>
 #include <sys/ioctl.h>
 #include <inttypes.h>
 #include <libintl.h>
@@ -1547,6 +1548,145 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
 	return 0;
 }
 
+#define RA_WINDOW 64
+
+static unsigned pass1_readahead(struct gfs2_sbd *sdp, uint64_t *ibuf, unsigned n, unsigned cur_window)
+{
+	unsigned i;
+	unsigned thresh = (n < RA_WINDOW) ? n : RA_WINDOW;
+
+	for (i = cur_window; i < thresh; i++)
+		posix_fadvise(sdp->device_fd, ibuf[i]*sdp->bsize, sdp->bsize, POSIX_FADV_WILLNEED);
+
+	return i;
+}
+
+static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n)
+{
+	struct gfs2_buffer_head *bh;
+	unsigned i;
+	unsigned ra_window = 0;
+	uint64_t block;
+
+	for (i = 0; i < n; i++) {
+		block = ibuf[i];
+
+		if (ra_window < RA_WINDOW/2)
+			ra_window = pass1_readahead(sdp, ibuf + i, n - i, ra_window);
+
+		/* skip gfs1 rindex indirect blocks */
+		if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
+			log_debug(_("Skipping rindex indir block "
+				    "%lld (0x%llx)\n"),
+				  (unsigned long long)block,
+				  (unsigned long long)block);
+			continue;
+		}
+		warm_fuzzy_stuff(block);
+
+		if (fsck_abort) { /* if asked to abort */
+			gfs2_special_free(&gfs1_rindex_blks);
+			return FSCK_OK;
+		}
+		if (skip_this_pass) {
+			printf( _("Skipping pass 1 is not a good idea.\n"));
+			skip_this_pass = FALSE;
+			fflush(stdout);
+		}
+		if (fsck_system_inode(sdp, block)) {
+			log_debug(_("Already processed system inode "
+				    "%lld (0x%llx)\n"),
+				  (unsigned long long)block,
+				  (unsigned long long)block);
+			continue;
+		}
+		bh = bread(sdp, block);
+
+		if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
+			/* In gfs2, a bitmap mark of 2 means an inode,
+			   but in gfs1 it means any metadata.  So if
+			   this is gfs1 and not an inode, it may be
+			   okay.  If it's non-dinode metadata, it will
+			   be referenced by an inode, so we need to
+			   skip it here and it will be sorted out
+			   when the referencing inode is checked. */
+			if (sdp->gfs1) {
+				uint32_t check_magic;
+
+				check_magic = ((struct gfs2_meta_header *)
+					       (bh->b_data))->mh_magic;
+				if (be32_to_cpu(check_magic) == GFS2_MAGIC) {
+					log_debug( _("Deferring GFS1 "
+						     "metadata block #"
+						     "%" PRIu64" (0x%"
+						     PRIx64 ")\n"),
+						   block, block);
+					brelse(bh);
+					continue;
+				}
+			}
+			log_err( _("Found invalid inode at block #"
+				   "%llu (0x%llx)\n"),
+				 (unsigned long long)block,
+				 (unsigned long long)block);
+			if (gfs2_blockmap_set(bl, block, gfs2_block_free)) {
+				stack;
+				brelse(bh);
+				gfs2_special_free(&gfs1_rindex_blks);
+				return FSCK_ERROR;
+			}
+			check_n_fix_bitmap(sdp, block, gfs2_block_free);
+		} else if (handle_di(sdp, bh) < 0) {
+			stack;
+			brelse(bh);
+			gfs2_special_free(&gfs1_rindex_blks);
+			return FSCK_ERROR;
+		}
+		/* Ignore everything else - they should be hit by the
+		   handle_di step.  Don't check NONE either, because
+		   check_meta passes everything if GFS2_METATYPE_NONE
+		   is specified.  Hopefully, other metadata types such
+		   as indirect blocks will be handled when the inode
+		   itself is processed, and if it's not, it should be
+		   caught in pass5. */
+		brelse(bh);
+	}
+
+	return 0;
+}
+
+static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
+{
+	unsigned k, n, i;
+	uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t));
+	int ret;
+
+	for (k = 0; k < rgd->ri.ri_length; k++) {
+		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE);
+
+		if (n) {
+			ret = pass1_process_bitmap(sdp, rgd, ibuf, n);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		  For GFS1, we have to count the "free meta" blocks in the
+		  resource group and mark them specially so we can count them
+		  properly in pass5.
+		 */
+		if (!sdp->gfs1)
+			continue;
+
+		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED);
+		for (i = 0; i < n; i++)
+			gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta);
+	}
+
+	free(ibuf);
+	return 0;
+}
+
 /**
  * pass1 - walk through inodes and check inode state
  *
@@ -1563,12 +1703,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
 int pass1(struct gfs2_sbd *sdp)
 {
 	struct osi_node *n, *next = NULL;
-	struct gfs2_buffer_head *bh;
-	uint64_t block = 0;
 	struct rgrp_tree *rgd;
-	int first;
 	uint64_t i;
 	uint64_t rg_count = 0;
+	int ret;
 
 	osi_list_init(&gfs1_rindex_blks.list);
 
@@ -1611,115 +1749,10 @@ int pass1(struct gfs2_sbd *sdp)
 			gfs2_meta_rgrp);*/
 		}
 
-		first = 1;
+		ret = pass1_process_rgrp(sdp, rgd);
+		if (ret)
+			return ret;
 
-		while (1) {
-			/* "block" is relative to the entire file system */
-			/* Get the next dinode in the file system, according
-			   to the bitmap.  This should ONLY be dinodes unless
-			   it's GFS1, in which case it can be any metadata. */
-			if (gfs2_next_rg_meta(rgd, &block, first))
-				break;
-			/* skip gfs1 rindex indirect blocks */
-			if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
-				log_debug(_("Skipping rindex indir block "
-					    "%lld (0x%llx)\n"),
-					  (unsigned long long)block,
-					  (unsigned long long)block);
-				first = 0;
-				continue;
-			}
-			warm_fuzzy_stuff(block);
-
-			if (fsck_abort) { /* if asked to abort */
-				gfs2_special_free(&gfs1_rindex_blks);
-				return FSCK_OK;
-			}
-			if (skip_this_pass) {
-				printf( _("Skipping pass 1 is not a good idea.\n"));
-				skip_this_pass = FALSE;
-				fflush(stdout);
-			}
-			if (fsck_system_inode(sdp, block)) {
-				log_debug(_("Already processed system inode "
-					    "%lld (0x%llx)\n"),
-					  (unsigned long long)block,
-					  (unsigned long long)block);
-				first = 0;
-				continue;
-			}
-			bh = bread(sdp, block);
-
-			/*log_debug( _("Checking metadata block #%" PRIu64
-			  " (0x%" PRIx64 ")\n"), block, block);*/
-
-			if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
-				/* In gfs2, a bitmap mark of 2 means an inode,
-				   but in gfs1 it means any metadata.  So if
-				   this is gfs1 and not an inode, it may be
-				   okay.  If it's non-dinode metadata, it will
-				   be referenced by an inode, so we need to
-				   skip it here and it will be sorted out
-				   when the referencing inode is checked. */
-				if (sdp->gfs1) {
-					uint32_t check_magic;
-
-					check_magic = ((struct
-							gfs2_meta_header *)
-						       (bh->b_data))->mh_magic;
-					if (be32_to_cpu(check_magic) ==
-					    GFS2_MAGIC) {
-						log_debug( _("Deferring GFS1 "
-							     "metadata block #"
-							     "%" PRIu64" (0x%"
-							     PRIx64 ")\n"),
-							   block, block);
-						brelse(bh);
-						first = 0;
-						continue;
-					}
-				}
-				log_err( _("Found invalid inode at block #"
-					   "%llu (0x%llx)\n"),
-					 (unsigned long long)block,
-					 (unsigned long long)block);
-				if (gfs2_blockmap_set(bl, block,
-						      gfs2_block_free)) {
-					stack;
-					brelse(bh);
-					gfs2_special_free(&gfs1_rindex_blks);
-					return FSCK_ERROR;
-				}
-				check_n_fix_bitmap(sdp, block,
-						   gfs2_block_free);
-			} else if (handle_di(sdp, bh) < 0) {
-				stack;
-				brelse(bh);
-				gfs2_special_free(&gfs1_rindex_blks);
-				return FSCK_ERROR;
-			}
-			/* Ignore everything else - they should be hit by the
-			   handle_di step.  Don't check NONE either, because
-			   check_meta passes everything if GFS2_METATYPE_NONE
-			   is specified.  Hopefully, other metadata types such
-			   as indirect blocks will be handled when the inode
-			   itself is processed, and if it's not, it should be
-			   caught in pass5. */
-			brelse(bh);
-			first = 0;
-		}
-		/*
-		  For GFS1, we have to count the "free meta" blocks in the
-		  resource group and mark them specially so we can count them
-		  properly in pass5.
-		 */
-		if (!sdp->gfs1)
-			continue;
-		first = 1;
-		while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) {
-			gfs2_blockmap_set(bl, block, gfs2_freemeta);
-			first = 0;
-		}
 	}
 	gfs2_special_free(&gfs1_rindex_blks);
 	return FSCK_OK;
diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
index 46d4d67..db31a6c 100644
--- a/gfs2/libgfs2/libgfs2.h
+++ b/gfs2/libgfs2/libgfs2.h
@@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp);
 extern int do_init_inum(struct gfs2_sbd *sdp);
 extern int do_init_statfs(struct gfs2_sbd *sdp);
 extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
+extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx,
+			      uint64_t *buf, uint8_t state);
 extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block,
 			     int first);
 extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
 				 uint64_t *block, uint32_t type, int first);
-extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block,
-				 int first);
 
 /* super.c */
 extern int check_sb(struct gfs2_sb *sb);
diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
index 645c45a..6981072 100644
--- a/gfs2/libgfs2/structures.c
+++ b/gfs2/libgfs2/structures.c
@@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
 	return 0;
 }
 
+unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state)
+{
+	struct gfs2_bitmap *bi = &rgd->bits[idx];
+	unsigned n = 0;
+	uint32_t blk = 0;
+
+	while(blk < (bi->bi_len * GFS2_NBBY)) {
+		blk = gfs2_bitfit(rgd->bh[idx]->b_data + bi->bi_offset,
+				  bi->bi_len, blk, state);
+		if (blk == BFITNOENT)
+			break;
+		buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0;
+		blk++;
+	}
+
+	return n;
+}
+
 /**
  * gfs2_next_rg_meta
  * @rgd:
@@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first)
 	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE);
 }
 
-int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first)
-{
-	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED);
-}
-
 /**
  * next_rg_metatype
  * @rgd:



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]