[Cluster-devel] cluster/gfs/gfs_fsck fs_recovery.c fsck.h init ...

rpeterso at sourceware.org rpeterso at sourceware.org
Tue Jun 20 14:43:01 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL4
Changes by:	rpeterso at sourceware.org	2006-06-20 14:42:56

Modified files:
	gfs/gfs_fsck   : fs_recovery.c fsck.h initialize.c log.c main.c 
	                 pass1.c pass2.c pass3.c pass4.c pass5.c super.c 
	                 util.c util.h 

Log message:
	This addresses bugzilla bug #156009 - gfs fsck needs a good
	review of logging.  I improved logging and added some "warm
	fuzzy" feel-good messages to let the users know it's not hung.

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fs_recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.2&r2=1.2.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.3&r2=1.2.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.4&r2=1.3.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/super.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.6&r2=1.2.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3

--- cluster/gfs/gfs_fsck/fs_recovery.c	2005/06/15 16:10:19	1.1.2.2
+++ cluster/gfs/gfs_fsck/fs_recovery.c	2006/06/20 14:42:56	1.1.2.3
@@ -75,15 +75,15 @@
  * Returns: 0 on success, -1 on failure
  */
 int reconstruct_journals(struct fsck_sb *sdp){
-  int i;
+	int i;
 
-  log_warn("Clearing journals (this may take a while)\n");
-  for(i=0; i < sdp->journals; i++) {
-    if((i % 10) == 0)
-      log_at_notice(".");
-    if(reconstruct_single_journal(sdp, i))
-      return -1;
-  }
-  log_notice("Cleared journals\n");
-  return 0;
+	log_notice("Clearing journals (this may take a while)");
+	for(i=0; i < sdp->journals; i++) {
+		if((i % 2) == 0)
+			log_at_notice(".");
+		if(reconstruct_single_journal(sdp, i))
+			return -1;
+	}
+	log_notice("\nJournals cleared.\n");
+	return 0;
 }
--- cluster/gfs/gfs_fsck/fsck.h	2005/06/17 16:18:33	1.2.2.2
+++ cluster/gfs/gfs_fsck/fsck.h	2006/06/20 14:42:56	1.2.2.3
@@ -26,7 +26,7 @@
 	int no:1;
 };
 
-
+extern uint64_t last_fs_block;
 
 int initialize(struct fsck_sb *sbp);
 void destroy(struct fsck_sb *sbp);
--- cluster/gfs/gfs_fsck/initialize.c	2005/12/15 21:36:42	1.1.2.7
+++ cluster/gfs/gfs_fsck/initialize.c	2006/06/20 14:42:56	1.1.2.8
@@ -98,6 +98,7 @@
 {
 	uint32_t i;
 
+	log_info("Freeing buffers.\n");
 	if(sdp->riinode){
 		free(sdp->riinode);
 		sdp->riinode = NULL;
@@ -108,7 +109,7 @@
 	}
 	if(sdp->rooti){
 		free(sdp->rooti);
-		sdp->rooti=NULL;
+		sdp->rooti = NULL;
 	}
 
 	if(sdp->jindex){
@@ -196,6 +197,7 @@
 			rmin = ri->ri_data1;
 	}
 
+	last_fs_block = rmax;
 
 	for (i = 0; i < sdp->journals; i++)
 	{
@@ -406,6 +408,7 @@
 			log_warn("Unable to unblock other mounters - manual intevention required\n");
 			log_warn("Use 'gfs_tool sb <device> proto' to fix\n");
 		}
+		log_info("Syncing the device.\n");
 		fsync(sbp->diskfd);
 	}
 	empty_super_block(sbp);
--- cluster/gfs/gfs_fsck/log.c	2005/06/15 16:10:19	1.2.2.3
+++ cluster/gfs/gfs_fsck/log.c	2006/06/20 14:42:56	1.2.2.4
@@ -45,11 +45,13 @@
 	case MSG_DEBUG:
 		printf("(%s:%d)\t", file, line);
 		vprintf(format, args);
+		fflush(stdout);
 		break;
 	case MSG_INFO:
 	case MSG_NOTICE:
 	case MSG_WARN:
 		vprintf(format, args);
+		fflush(stdout);
 		break;
 	case MSG_ERROR:
 	case MSG_CRITICAL:
--- cluster/gfs/gfs_fsck/main.c	2005/02/22 22:01:06	1.1.2.2
+++ cluster/gfs/gfs_fsck/main.c	2006/06/20 14:42:56	1.1.2.3
@@ -20,6 +20,8 @@
 #include "fsck.h"
 #include "log.h"
 
+uint64_t last_fs_block;
+
 void print_map(struct block_list *il, int count)
 {
 	int i, j;
@@ -128,40 +130,41 @@
 	log_notice("Starting pass1\n");
 	if (pass1(sbp))
 		return 1;
-	log_notice("Pass1 complete\n");
+	log_notice("Pass1 complete      \n");
 
 	log_notice("Starting pass1b\n");
 	if(pass1b(sbp))
 		return 1;
-	log_notice("Pass1b complete\n");
+	log_notice("Pass1b complete      \n");
 
 	log_notice("Starting pass1c\n");
 	if(pass1c(sbp))
 		return 1;
-	log_notice("Pass1c complete\n");
+	log_notice("Pass1c complete      \n");
 
 	log_notice("Starting pass2\n");
 	if (pass2(sbp, &opts))
 		return 1;
-	log_notice("Pass2 complete\n");
+	log_notice("Pass2 complete      \n");
 
 	log_notice("Starting pass3\n");
 	if (pass3(sbp, &opts))
 		return 1;
-	log_notice("Pass3 complete\n");
+	log_notice("Pass3 complete      \n");
 
 	log_notice("Starting pass4\n");
 	if (pass4(sbp, &opts))
 		return 1;
-	log_notice("Pass4 complete\n");
+	log_notice("Pass4 complete      \n");
 
 	log_notice("Starting pass5\n");
 	if (pass5(sbp, &opts))
 		return 1;
-	log_notice("Pass5 complete\n");
+	log_notice("Pass5 complete      \n");
 
 /*	print_map(sbp->bl, sbp->last_fs_block); */
 
+	log_notice("Writing changes to disk\n");
 	destroy(sbp);
 
 	return 0;
--- cluster/gfs/gfs_fsck/pass1.c	2006/04/20 20:43:41	1.1.2.8
+++ cluster/gfs/gfs_fsck/pass1.c	2006/06/20 14:42:56	1.1.2.9
@@ -883,7 +883,8 @@
 			stack;
 			return -1;
 		}
-		log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr, rgd->rd_ri.ri_length);
+		log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr,
+				  rgd->rd_ri.ri_length);
 		for (i = 0; i < rgd->rd_ri.ri_length; i++) {
 			if(block_set(sbp->bl, rgd->rd_ri.ri_addr + i,
 				     meta_other)){
@@ -903,7 +904,7 @@
 			if(next_rg_meta_free(rgd, &block, first, &mfree))
 				break;
 
-
+			warm_fuzzy_stuff(block);
 			if(get_and_read_buf(sbp, block, &bh, 0)){
 				stack;
 				log_crit("Unable to retrieve block %"PRIu64
--- cluster/gfs/gfs_fsck/pass2.c	2005/03/07 22:06:01	1.3.2.4
+++ cluster/gfs/gfs_fsck/pass2.c	2006/06/20 14:42:56	1.3.2.5
@@ -808,6 +808,7 @@
 	}
 	int error = 0;
 
+	log_info("Checking directory inodes.\n");
 	/* Grab each directory inode, and run checks on it */
 	for(i = 0; i < sbp->last_fs_block; i++) {
 
@@ -824,7 +825,7 @@
 		if(q.block_type != inode_dir)
 			continue;
 
-		log_info("Checking directory inode at block %"PRIu64"\n", i);
+		log_debug("Checking directory inode at block %"PRIu64"\n", i);
 
 
 		memset(&ds, 0, sizeof(ds));
--- cluster/gfs/gfs_fsck/pass3.c	2005/03/03 19:57:03	1.1.2.3
+++ cluster/gfs/gfs_fsck/pass3.c	2006/06/20 14:42:56	1.1.2.4
@@ -205,6 +205,7 @@
 	 * until we find one that's been checked already.  If we don't
 	 * find a parent, put in lost+found.
 	 */
+	log_info("Checking directory linkage.\n");
 	for(i = 0; i < FSCK_HASH_SIZE; i++) {
 	osi_list_foreach(tmp, &sbp->dir_hash[i]) {
 		di = osi_list_entry(tmp, struct dir_info, list);
@@ -270,8 +271,8 @@
 				break;
 			}
 			else {
-				log_info("Directory at block %"PRIu64
-					 " connected\n", di->dinode);
+				log_debug("Directory at block %" PRIu64 " connected\n",
+						 di->dinode);
 			}
 			di = tdi;
 		}
--- cluster/gfs/gfs_fsck/pass4.c	2005/03/07 22:06:01	1.1.2.5
+++ cluster/gfs/gfs_fsck/pass4.c	2006/06/20 14:42:56	1.1.2.6
@@ -59,8 +59,8 @@
 		   (ii->inode == sbp->sb.sb_quota_di.no_addr) ||
 		   (ii->inode == sbp->sb.sb_license_di.no_addr))
 			continue;
-		log_info("Checking reference count on inode at block %"PRIu64
-			 "\n", ii->inode);
+		log_debug("Checking reference count on inode at block %"PRIu64
+				  "\n", ii->inode);
 		if(ii->counted_links == 0) {
 			log_err("Found unlinked inode at %"PRIu64"\n",
 				ii->inode);
@@ -174,6 +174,7 @@
 	if(sbp->lf_dip)
 		log_debug("At beginning of pass4, l+f entries is %u\n",
 			  sbp->lf_dip->i_di.di_entries);
+	log_info("Checking inode reference counts.\n");
 	for (i = 0; i < FSCK_HASH_SIZE; i++) {
 		list = &sbp->inode_hash[i];
 		if(scan_inode_list(sbp, list)) {
--- cluster/gfs/gfs_fsck/pass5.c	2006/04/20 20:43:41	1.1.2.6
+++ cluster/gfs/gfs_fsck/pass5.c	2006/06/20 14:42:56	1.1.2.7
@@ -17,6 +17,7 @@
 #include "ondisk.h"
 #include "fs_bits.h"
 #include "bio.h"
+#include "util.h"
 
 #ifdef DEBUG
 int rgrp_countbits(unsigned char *buffer, unsigned int buflen,
@@ -191,6 +192,8 @@
 	while(byte < end) {
 		rg_status = ((*byte >> bit) & GFS_BIT_MASK);
 		block = rg_data + *rg_block;
+		log_debug("Checking block %" PRIu64 "\n", block);
+		warm_fuzzy_stuff(block);
 		block_check(sbp->bl, block, &q);
 
 		block_status = convert_mark(q.block_type, count);
--- cluster/gfs/gfs_fsck/super.c	2006/04/20 20:43:41	1.2.2.6
+++ cluster/gfs/gfs_fsck/super.c	2006/06/20 14:42:56	1.2.2.7
@@ -1081,9 +1081,9 @@
 						  gfs_grow or something.  Count the RGs by hand. */
 	} trust_lvl;
 
-	log_warn("Validating Resource Group index.\n");
+	log_info("Validating Resource Group index.\n");
 	for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) {
-		log_warn("Level %d check.\n", trust_lvl + 1);
+		log_info("Level %d check.\n", trust_lvl + 1);
 		count1 = count2 = 0;
 		/* ---------------------------------------------------------------- */
 		/* Step 1 - Calculate or figure out our own RG index                */
@@ -1097,7 +1097,7 @@
 			error = gfs_rgindex_calculate(sdp, &expected_rglist,
 										  &calc_rg_count);
 			if (error) { /* If calculated RGs don't reasonably match the fs */
-				log_warn("(failed--trying again at level 3)\n");
+				log_info("(failed--trying again at level 3)\n");
 				ri_cleanup(&sdp->rglist);
 				continue; /* Try again, this time counting them manually */
 			}
@@ -1106,7 +1106,7 @@
 			error = gfs_rgindex_rebuild(sdp, &expected_rglist,
 										&calc_rg_count); /* count the RGs. */
 			if (error) { /* If calculated RGs don't reasonably match the fs */
-				log_err("(failed--giving up)\n");
+				log_info("(failed--giving up)\n");
 				goto fail; /* try again, this time counting them manually */
 			}
 		}
@@ -1177,7 +1177,7 @@
 			count1++;
 		} /* for all RGs in the index */
 		if (!error) {
-			log_warn("%u resource groups found.\n", rg);
+			log_info("%u resource groups found.\n", rg);
 			if (trust_lvl != blind_faith && rg != calc_rg_count)
 				log_warn("Resource group count discrepancy. Index says %d. " \
 						 "Should be %d.\n", rg, calc_rg_count);
@@ -1205,15 +1205,15 @@
 			sdp->rgcount = count1;
 		}
 		if (!error) { /* if no problems encountered with the rgs */
-			log_warn("(passed)\n");
+			log_info("(passed)\n");
 			break;  /* no reason to distrust what we saw. Otherwise, we
 					   reiterate and become a little less trusting. */
 		}
 		else {
 			if (trust_lvl < distrust)
-				log_warn("(failed--trying again at level 2)\n");
+				log_info("(failed--trying again at level 2)\n");
 			else
-				log_err("(failed--recovery impossible)\n");
+				log_info("(failed--recovery impossible)\n");
 		}
 		ri_cleanup(&sdp->rglist);
 	} /* for trust_lvl */
--- cluster/gfs/gfs_fsck/util.c	2005/03/03 19:57:03	1.1.2.3
+++ cluster/gfs/gfs_fsck/util.c	2006/06/20 14:42:56	1.1.2.4
@@ -313,3 +313,24 @@
   return NULL;
 }
 #endif
+
+/* Put out a warm, fuzzy message every second so the user     */
+/* doesn't think we hung.  (This may take a long time).       */
+void warm_fuzzy_stuff(uint64_t block)
+{
+	static struct timeval tv;
+	static uint32_t seconds = 0;
+	
+	gettimeofday(&tv, NULL);
+	if (!seconds)
+		seconds = tv.tv_sec;
+	if (tv.tv_sec - seconds) {
+		uint64_t percent;
+		
+		seconds = tv.tv_sec;
+		if (last_fs_block) {
+			percent = (block * 100) / last_fs_block;
+			log_notice("\r%" PRIu64 " percent complete.\r", percent);
+		}
+	}
+}
--- cluster/gfs/gfs_fsck/util.h	2005/03/03 19:57:03	1.1.2.2
+++ cluster/gfs/gfs_fsck/util.h	2006/06/20 14:42:56	1.1.2.3
@@ -35,5 +35,6 @@
 int next_rg_meta_free(struct fsck_rgrp *rgd, uint64 *block, int first, int *free);
 int next_rg_metatype(struct fsck_rgrp *rgd, uint64 *block, uint32 type, int first);
 struct di_info *search_list(osi_list_t *list, uint64 addr);
+void warm_fuzzy_stuff(uint64_t block);
 
 #endif /* __UTIL_H__ */




More information about the Cluster-devel mailing list