[Cluster-devel] cluster/gfs/gfs_fsck fsck.h initialize.c log.c ...

rpeterso at sourceware.org rpeterso at sourceware.org
Fri Nov 17 16:32:52 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	STABLE
Changes by:	rpeterso at sourceware.org	2006-11-17 16:32:49

Modified files:
	gfs/gfs_fsck   : fsck.h initialize.c log.c main.c pass1.c 
	                 pass1b.c pass1c.c pass2.c pass3.c pass4.c 
	                 pass5.c util.c 

Log message:
	Resolves: bz208836 - fatal: invalid metadata block
	1. Fix a memory leak in pass1b.
	2. Improve performance of pass1b by combining loops through fs.
	3. Give an error message and abort if file system > 16TB and node
	architecture is 32-bits.
	4. Give users an "Abort" "Continue" and "Skip" if they interrupt
	with ctrl-c.  Also, report progress for that pass on interrupt.
	5. Added more "percent complete" messages for other passes.
	
	See bz comment #33 for more details.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.4.1.2.1&r2=1.2.2.1.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4.4.2.2.4&r2=1.1.2.4.4.2.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.2.4.1.2.1&r2=1.2.2.2.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.2.6.1&r2=1.1.2.2.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.7.6.2&r2=1.1.2.7.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1b.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.1&r2=1.3.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1c.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.4.1.2.1&r2=1.2.2.1.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.4.6.1&r2=1.3.2.4.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.6.1&r2=1.1.2.3.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.5.6.1&r2=1.1.2.5.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4.6.3&r2=1.1.2.4.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.6.1&r2=1.1.2.3.6.2

--- cluster/gfs/gfs_fsck/fsck.h	2006/06/21 14:21:21	1.2.2.1.4.1.2.1
+++ cluster/gfs/gfs_fsck/fsck.h	2006/11/17 16:32:49	1.2.2.1.4.1.2.2
@@ -26,7 +26,8 @@
 	int no:1;
 };
 
-extern uint64_t last_fs_block;
+extern uint64_t last_fs_block, last_reported_block;
+extern int skip_this_pass, fsck_abort, fsck_query;
 
 int initialize(struct fsck_sb *sbp);
 void destroy(struct fsck_sb *sbp);
--- cluster/gfs/gfs_fsck/initialize.c	2006/09/20 16:37:56	1.1.2.4.4.2.2.4
+++ cluster/gfs/gfs_fsck/initialize.c	2006/11/17 16:32:49	1.1.2.4.4.2.2.5
@@ -210,6 +210,12 @@
 	}
 
 	sdp->last_fs_block = (jmax > rmax) ? jmax : rmax;
+	if (sdp->last_fs_block > 0xffffffff && sizeof(unsigned long) <= 4) {
+		log_crit("This file system is too big for this computer to handle.\n");
+		log_crit("Last fs block = 0x%llx, but sizeof(unsigned long) is %d bytes.\n",
+				 sdp->last_fs_block, sizeof(unsigned long));
+		goto fail;
+	}
 
 	sdp->last_data_block = rmax;
 	sdp->first_data_block = rmin;
--- cluster/gfs/gfs_fsck/log.c	2006/06/21 14:21:21	1.2.2.2.4.1.2.1
+++ cluster/gfs/gfs_fsck/log.c	2006/11/17 16:32:49	1.2.2.2.4.1.2.2
@@ -102,6 +102,7 @@
 	if(sbp->opts->no)
 		return 0;
 
+	fsck_query = TRUE;
 	/* Watch stdin (fd 0) to see when it has input. */
 	FD_ZERO(&rfds);
 	FD_SET(STDIN_FILENO, &rfds);
@@ -149,5 +150,6 @@
 		read(STDIN_FILENO, &response, sizeof(char));
 	}
 
+	fsck_query = FALSE;
 	return ret;
 }
--- cluster/gfs/gfs_fsck/main.c	2006/06/21 14:21:21	1.1.2.2.6.1
+++ cluster/gfs/gfs_fsck/main.c	2006/11/17 16:32:49	1.1.2.2.6.2
@@ -14,13 +14,17 @@
 #include <stdio.h>
 #include <stdint.h>
 #include <libgen.h>
+#include <ctype.h>
+#include <signal.h>
 
 #include "copyright.cf"
 #include "fsck_incore.h"
 #include "fsck.h"
 #include "log.h"
 
-uint64_t last_fs_block;
+uint64_t last_fs_block, last_reported_block = -1;
+int skip_this_pass = FALSE, fsck_abort = FALSE, fsck_query = FALSE;
+const char *pass = "";
 
 void print_map(struct block_list *il, int count)
 {
@@ -110,6 +114,61 @@
 	return 0;
 }
 
+void interrupt(int sig)
+{
+	fd_set rfds;
+	struct timeval tv;
+	char response;
+	int err;
+
+	if (fsck_query) /* if we're asking them a question */
+		return;     /* ignore the interrupt signal */
+	FD_ZERO(&rfds);
+	FD_SET(STDIN_FILENO, &rfds);
+
+	tv.tv_sec = 0;
+	tv.tv_usec = 0;
+	/* Make sure there isn't extraneous input before asking the
+	 * user the question */
+	while((err = select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv))) {
+		if(err < 0) {
+			log_debug("Error in select() on stdin\n");
+			break;
+		}
+		read(STDIN_FILENO, &response, sizeof(char));
+	}
+	while (TRUE) {
+		printf("\ngfs_fsck interrupted in %s:  ", pass);
+		if (!last_reported_block || last_reported_block == last_fs_block)
+			printf("progress unknown.\n");
+		else
+			printf("processing block %" PRIu64 " out of %" PRIu64 "\n",
+				   last_reported_block, last_fs_block);
+		printf("Do you want to abort gfs_fsck, skip the rest of %s or continue (a/s/c)?", pass);
+
+		/* Make sure query is printed out */
+		fflush(stdout);
+		read(STDIN_FILENO, &response, sizeof(char));
+
+		if(tolower(response) == 's') {
+			skip_this_pass = TRUE;
+			return;
+		}
+		else if (tolower(response) == 'a') {
+			fsck_abort = TRUE;
+			return;
+		}
+		else if (tolower(response) == 'c')
+			return;
+        else {
+			while(response != '\n')
+				read(STDIN_FILENO, &response, sizeof(char));
+			printf("Bad response, please type 'c', 'a' or 's'.\n");
+			continue;
+        }
+	}
+}
+
 int main(int argc, char **argv)
 {
 	struct fsck_sb sb;
@@ -127,44 +186,100 @@
 	if (initialize(sbp))
 		return 1;
 
+	signal(SIGINT, interrupt);
 	log_notice("Starting pass1\n");
+	pass = "pass 1";
+	last_reported_block = 0;
 	if (pass1(sbp))
 		return 1;
-	log_notice("Pass1 complete      \n");
-
-	log_notice("Starting pass1b\n");
-	if(pass1b(sbp))
-		return 1;
-	log_notice("Pass1b complete      \n");
-
-	log_notice("Starting pass1c\n");
-	if(pass1c(sbp))
-		return 1;
-	log_notice("Pass1c complete      \n");
-
-	log_notice("Starting pass2\n");
-	if (pass2(sbp, &opts))
-		return 1;
-	log_notice("Pass2 complete      \n");
-
-	log_notice("Starting pass3\n");
-	if (pass3(sbp, &opts))
-		return 1;
-	log_notice("Pass3 complete      \n");
-
-	log_notice("Starting pass4\n");
-	if (pass4(sbp, &opts))
-		return 1;
-	log_notice("Pass4 complete      \n");
-
-	log_notice("Starting pass5\n");
-	if (pass5(sbp, &opts))
-		return 1;
-	log_notice("Pass5 complete      \n");
+	if (skip_this_pass || fsck_abort) {
+		skip_this_pass = FALSE;
+		log_notice("Pass1 interrupted   \n");
+	}
+	else
+		log_notice("Pass1 complete      \n");
 
-/*	print_map(sbp->bl, sbp->last_fs_block); */
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 1b";
+		log_notice("Starting pass1b\n");
+		if(pass1b(sbp))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass1b interrupted   \n");
+		}
+		else
+			log_notice("Pass1b complete      \n");
+	}
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 1c";
+		log_notice("Starting pass1c\n");
+		if(pass1c(sbp))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass1c interrupted   \n");
+		}
+		else
+			log_notice("Pass1c complete      \n");
+	}
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 2";
+		log_notice("Starting pass2\n");
+		if (pass2(sbp, &opts))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass2 interrupted   \n");
+		}
+		else
+			log_notice("Pass2 complete      \n");
+	}
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 3";
+		log_notice("Starting pass3\n");
+		if (pass3(sbp, &opts))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass3 interrupted   \n");
+		}
+		else
+			log_notice("Pass3 complete      \n");
+	}
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 4";
+		log_notice("Starting pass4\n");
+		if (pass4(sbp, &opts))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass4 interrupted   \n");
+		}
+		else
+			log_notice("Pass4 complete      \n");
+	}
+	if (!fsck_abort) {
+		last_reported_block = 0;
+		pass = "pass 5";
+		log_notice("Starting pass5\n");
+		if (pass5(sbp, &opts))
+			return 1;
+		if (skip_this_pass || fsck_abort) {
+			skip_this_pass = FALSE;
+			log_notice("Pass5 interrupted   \n");
+		}
+		else
+			log_notice("Pass5 complete      \n");
+		/*	print_map(sbp->bl, sbp->last_fs_block); */
 
-	log_notice("Writing changes to disk\n");
+		log_notice("Writing changes to disk\n");
+	}
 	destroy(sbp);
 
 	return 0;
--- cluster/gfs/gfs_fsck/pass1.c	2006/06/21 14:21:21	1.1.2.7.6.2
+++ cluster/gfs/gfs_fsck/pass1.c	2006/11/17 16:32:49	1.1.2.7.6.3
@@ -905,6 +905,13 @@
 				break;
 
 			warm_fuzzy_stuff(block);
+			if (fsck_abort) /* if asked to abort */
+				return 0;
+			if (skip_this_pass) {
+				printf("Skipping pass 1 is not a good idea.\n");
+				skip_this_pass = FALSE;
+				fflush(stdout);
+			}
 			if(get_and_read_buf(sbp, block, &bh, 0)){
 				stack;
 				log_crit("Unable to retrieve block %"PRIu64
--- cluster/gfs/gfs_fsck/pass1b.c	2005/07/11 18:24:40	1.3.2.1.6.1
+++ cluster/gfs/gfs_fsck/pass1b.c	2006/11/17 16:32:49	1.3.2.1.6.2
@@ -470,6 +470,7 @@
 	osi_list_t *tmp;
 	struct metawalk_fxns find_dirents = {0};
 	find_dirents.check_dentry = &find_dentry;
+	int rc = 0;
 
 	osi_list_init(&sbp->dup_list);
 	/* Shove all blocks marked as duplicated into a list */
@@ -487,10 +488,14 @@
 	log_info("Scanning filesystem for inodes containing duplicate blocks...\n");
 	log_debug("Filesystem has %"PRIu64" blocks total\n", sbp->last_fs_block);
 	for(i = 0; i < sbp->last_fs_block; i += 1) {
+		warm_fuzzy_stuff(i);
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			goto out;
 		log_debug("Scanning block %"PRIu64" for inodes\n", i);
 		if(block_check(sbp->bl, i, &q)) {
 			stack;
-			return -1;
+			rc = -1;
+			goto out;
 		}
 		if((q.block_type == inode_dir) ||
 		   (q.block_type == inode_file) ||
@@ -503,33 +508,27 @@
 				b = osi_list_entry(tmp, struct blocks, list);
 				if(find_block_ref(sbp, i, b)) {
 					stack;
-					return -1;
+					rc = -1;
+					goto out;
 				}
 			}
 		}
-	}
-
-	/* Rescan the fs looking for directory entries to the inodes
-	 * with duplicate blocks - might need this to deal with the
-	 * inode correctly */
-	log_info("Looking through directory entries for inodes with duplicate blocks...\n");
-	for(i = 0; i < sbp->last_fs_block; i++) {
-		if(block_check(sbp->bl, i, &q)) {
-			stack;
-			return 0;
-		}
 		if(q.block_type == inode_dir) {
 			check_dir(sbp, i, &find_dirents);
 		}
 	}
 
-
 	/* Fix dups here - it's going to slow things down a lot to fix
 	 * it later */
 	log_info("Handling duplicate blocks\n");
-	osi_list_foreach(tmp, &sbp->dup_list) {
-		b = osi_list_entry(tmp, struct blocks, list);
-		handle_dup_blk(sbp, b);
+out:
+	/*osi_list_foreach(tmp, &sbp->dup_list) {*/
+	while (!osi_list_empty(&sbp->dup_list)) {
+		b = osi_list_entry(sbp->dup_list.next, struct blocks, list);
+		if (!skip_this_pass && !rc) /* no error & not asked to skip the rest */
+			handle_dup_blk(sbp, b);
+		osi_list_del(&b->list);
+		free(b);
 	}
-	return 0;
+	return rc;
 }
--- cluster/gfs/gfs_fsck/pass1c.c	2006/02/02 01:16:10	1.2.2.1.4.1.2.1
+++ cluster/gfs/gfs_fsck/pass1c.c	2006/11/17 16:32:49	1.2.2.1.4.1.2.2
@@ -252,6 +252,8 @@
 	log_info("Looking for inodes containing ea blocks...\n");
 	while (!find_next_block_type(sbp->bl, eattr_block, &block_no)) {
 
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 		log_info("EA in inode %"PRIu64"\n", block_no);
 		if(get_and_read_buf(sbp, block_no, &bh, 0)) {
 			stack;
--- cluster/gfs/gfs_fsck/pass2.c	2006/06/21 14:21:21	1.3.2.4.6.1
+++ cluster/gfs/gfs_fsck/pass2.c	2006/11/17 16:32:49	1.3.2.4.6.2
@@ -812,6 +812,10 @@
 	/* Grab each directory inode, and run checks on it */
 	for(i = 0; i < sbp->last_fs_block; i++) {
 
+		warm_fuzzy_stuff(i);
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
+
 		/* Skip the root inode - it's checked above */
 		if(i == sbp->sb.sb_root_di.no_addr)
 			continue;
--- cluster/gfs/gfs_fsck/pass3.c	2006/06/21 14:21:21	1.1.2.3.6.1
+++ cluster/gfs/gfs_fsck/pass3.c	2006/11/17 16:32:49	1.1.2.3.6.2
@@ -213,6 +213,8 @@
 			/* FIXME: Change this so it returns success or
 			 * failure and put the parent inode in a
 			 * param */
+			if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+				return 0;
 			tdi = mark_and_return_parent(sbp, di);
 
 			/* FIXME: Factor this ? */
--- cluster/gfs/gfs_fsck/pass4.c	2006/06/21 14:21:21	1.1.2.5.6.1
+++ cluster/gfs/gfs_fsck/pass4.c	2006/11/17 16:32:49	1.1.2.5.6.2
@@ -49,6 +49,8 @@
 	/* FIXME: should probably factor this out into a generic
 	 * scanning fxn */
 	osi_list_foreach(tmp, list) {
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 		if(!(ii = osi_list_entry(tmp, struct inode_info, list))) {
 			log_crit("osi_list_foreach broken in scan_info_list!!\n");
 			exit(1);
@@ -176,6 +178,8 @@
 			  sbp->lf_dip->i_di.di_entries);
 	log_info("Checking inode reference counts.\n");
 	for (i = 0; i < FSCK_HASH_SIZE; i++) {
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 		list = &sbp->inode_hash[i];
 		if(scan_inode_list(sbp, list)) {
 			stack;
--- cluster/gfs/gfs_fsck/pass5.c	2006/06/21 14:21:21	1.1.2.4.6.3
+++ cluster/gfs/gfs_fsck/pass5.c	2006/11/17 16:32:49	1.1.2.4.6.4
@@ -194,6 +194,8 @@
 		block = rg_data + *rg_block;
 		log_debug("Checking block %" PRIu64 "\n", block);
 		warm_fuzzy_stuff(block);
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 		block_check(sbp->bl, block, &q);
 
 		block_status = convert_mark(q.block_type, count);
@@ -266,6 +268,8 @@
 				   BH_DATA(rgp->rd_bh[i]) + bits->bi_offset,
 				   bits->bi_len, &rg_block,
 				   rgp->rd_ri.ri_data1, count);
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 	}
 
 	/* Compare the rgrps counters with what we found */
@@ -351,6 +355,8 @@
 
 	/* Reconcile RG bitmaps with fsck bitmap */
 	for(tmp = sbp->rglist.next; tmp != &sbp->rglist; tmp = tmp->next){
+		if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+			return 0;
 		log_info("Updating Resource Group %"PRIu64"\n", rg_count);
 		memset(count, 0, sizeof(*count) * 5);
 		rgp = osi_list_entry(tmp, struct fsck_rgrp, rd_list);
--- cluster/gfs/gfs_fsck/util.c	2006/06/21 14:21:24	1.1.2.3.6.1
+++ cluster/gfs/gfs_fsck/util.c	2006/11/17 16:32:49	1.1.2.3.6.2
@@ -321,6 +321,7 @@
 	static struct timeval tv;
 	static uint32_t seconds = 0;
 	
+	last_reported_block = block;
 	gettimeofday(&tv, NULL);
 	if (!seconds)
 		seconds = tv.tv_sec;




More information about the Cluster-devel mailing list