[Cluster-devel] cluster/gfs/gfs_fsck fs_recovery.c fsck.h init ...
rpeterso at sourceware.org
rpeterso at sourceware.org
Tue Jun 20 14:43:01 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Branch: RHEL4
Changes by: rpeterso at sourceware.org 2006-06-20 14:42:56
Modified files:
gfs/gfs_fsck : fs_recovery.c fsck.h initialize.c log.c main.c
pass1.c pass2.c pass3.c pass4.c pass5.c super.c
util.c util.h
Log message:
This addresses bugzilla bug #156009 - gfs fsck needs a good
review of logging. I improved logging and added some "warm
fuzzy" feel-good messages to let the users know it's not hung.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fs_recovery.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.2&r2=1.2.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.7&r2=1.1.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.3&r2=1.2.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.8&r2=1.1.2.9
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.3.2.4&r2=1.3.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.5&r2=1.1.2.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.6&r2=1.1.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/super.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.2.2.6&r2=1.2.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.3&r2=1.1.2.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.h.diff?cvsroot=cluster&only_with_tag=RHEL4&r1=1.1.2.2&r2=1.1.2.3
--- cluster/gfs/gfs_fsck/fs_recovery.c 2005/06/15 16:10:19 1.1.2.2
+++ cluster/gfs/gfs_fsck/fs_recovery.c 2006/06/20 14:42:56 1.1.2.3
@@ -75,15 +75,15 @@
* Returns: 0 on success, -1 on failure
*/
int reconstruct_journals(struct fsck_sb *sdp){
- int i;
+ int i;
- log_warn("Clearing journals (this may take a while)\n");
- for(i=0; i < sdp->journals; i++) {
- if((i % 10) == 0)
- log_at_notice(".");
- if(reconstruct_single_journal(sdp, i))
- return -1;
- }
- log_notice("Cleared journals\n");
- return 0;
+ log_notice("Clearing journals (this may take a while)");
+ for(i=0; i < sdp->journals; i++) {
+ if((i % 2) == 0)
+ log_at_notice(".");
+ if(reconstruct_single_journal(sdp, i))
+ return -1;
+ }
+ log_notice("\nJournals cleared.\n");
+ return 0;
}
--- cluster/gfs/gfs_fsck/fsck.h 2005/06/17 16:18:33 1.2.2.2
+++ cluster/gfs/gfs_fsck/fsck.h 2006/06/20 14:42:56 1.2.2.3
@@ -26,7 +26,7 @@
int no:1;
};
-
+extern uint64_t last_fs_block;
int initialize(struct fsck_sb *sbp);
void destroy(struct fsck_sb *sbp);
--- cluster/gfs/gfs_fsck/initialize.c 2005/12/15 21:36:42 1.1.2.7
+++ cluster/gfs/gfs_fsck/initialize.c 2006/06/20 14:42:56 1.1.2.8
@@ -98,6 +98,7 @@
{
uint32_t i;
+ log_info("Freeing buffers.\n");
if(sdp->riinode){
free(sdp->riinode);
sdp->riinode = NULL;
@@ -108,7 +109,7 @@
}
if(sdp->rooti){
free(sdp->rooti);
- sdp->rooti=NULL;
+ sdp->rooti = NULL;
}
if(sdp->jindex){
@@ -196,6 +197,7 @@
rmin = ri->ri_data1;
}
+ last_fs_block = rmax;
for (i = 0; i < sdp->journals; i++)
{
@@ -406,6 +408,7 @@
log_warn("Unable to unblock other mounters - manual intevention required\n");
log_warn("Use 'gfs_tool sb <device> proto' to fix\n");
}
+ log_info("Syncing the device.\n");
fsync(sbp->diskfd);
}
empty_super_block(sbp);
--- cluster/gfs/gfs_fsck/log.c 2005/06/15 16:10:19 1.2.2.3
+++ cluster/gfs/gfs_fsck/log.c 2006/06/20 14:42:56 1.2.2.4
@@ -45,11 +45,13 @@
case MSG_DEBUG:
printf("(%s:%d)\t", file, line);
vprintf(format, args);
+ fflush(stdout);
break;
case MSG_INFO:
case MSG_NOTICE:
case MSG_WARN:
vprintf(format, args);
+ fflush(stdout);
break;
case MSG_ERROR:
case MSG_CRITICAL:
--- cluster/gfs/gfs_fsck/main.c 2005/02/22 22:01:06 1.1.2.2
+++ cluster/gfs/gfs_fsck/main.c 2006/06/20 14:42:56 1.1.2.3
@@ -20,6 +20,8 @@
#include "fsck.h"
#include "log.h"
+uint64_t last_fs_block;
+
void print_map(struct block_list *il, int count)
{
int i, j;
@@ -128,40 +130,41 @@
log_notice("Starting pass1\n");
if (pass1(sbp))
return 1;
- log_notice("Pass1 complete\n");
+ log_notice("Pass1 complete \n");
log_notice("Starting pass1b\n");
if(pass1b(sbp))
return 1;
- log_notice("Pass1b complete\n");
+ log_notice("Pass1b complete \n");
log_notice("Starting pass1c\n");
if(pass1c(sbp))
return 1;
- log_notice("Pass1c complete\n");
+ log_notice("Pass1c complete \n");
log_notice("Starting pass2\n");
if (pass2(sbp, &opts))
return 1;
- log_notice("Pass2 complete\n");
+ log_notice("Pass2 complete \n");
log_notice("Starting pass3\n");
if (pass3(sbp, &opts))
return 1;
- log_notice("Pass3 complete\n");
+ log_notice("Pass3 complete \n");
log_notice("Starting pass4\n");
if (pass4(sbp, &opts))
return 1;
- log_notice("Pass4 complete\n");
+ log_notice("Pass4 complete \n");
log_notice("Starting pass5\n");
if (pass5(sbp, &opts))
return 1;
- log_notice("Pass5 complete\n");
+ log_notice("Pass5 complete \n");
/* print_map(sbp->bl, sbp->last_fs_block); */
+ log_notice("Writing changes to disk\n");
destroy(sbp);
return 0;
--- cluster/gfs/gfs_fsck/pass1.c 2006/04/20 20:43:41 1.1.2.8
+++ cluster/gfs/gfs_fsck/pass1.c 2006/06/20 14:42:56 1.1.2.9
@@ -883,7 +883,8 @@
stack;
return -1;
}
- log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr, rgd->rd_ri.ri_length);
+ log_debug("RG at %"PRIu64" is %u long\n", rgd->rd_ri.ri_addr,
+ rgd->rd_ri.ri_length);
for (i = 0; i < rgd->rd_ri.ri_length; i++) {
if(block_set(sbp->bl, rgd->rd_ri.ri_addr + i,
meta_other)){
@@ -903,7 +904,7 @@
if(next_rg_meta_free(rgd, &block, first, &mfree))
break;
-
+ warm_fuzzy_stuff(block);
if(get_and_read_buf(sbp, block, &bh, 0)){
stack;
log_crit("Unable to retrieve block %"PRIu64
--- cluster/gfs/gfs_fsck/pass2.c 2005/03/07 22:06:01 1.3.2.4
+++ cluster/gfs/gfs_fsck/pass2.c 2006/06/20 14:42:56 1.3.2.5
@@ -808,6 +808,7 @@
}
int error = 0;
+ log_info("Checking directory inodes.\n");
/* Grab each directory inode, and run checks on it */
for(i = 0; i < sbp->last_fs_block; i++) {
@@ -824,7 +825,7 @@
if(q.block_type != inode_dir)
continue;
- log_info("Checking directory inode at block %"PRIu64"\n", i);
+ log_debug("Checking directory inode at block %"PRIu64"\n", i);
memset(&ds, 0, sizeof(ds));
--- cluster/gfs/gfs_fsck/pass3.c 2005/03/03 19:57:03 1.1.2.3
+++ cluster/gfs/gfs_fsck/pass3.c 2006/06/20 14:42:56 1.1.2.4
@@ -205,6 +205,7 @@
* until we find one that's been checked already. If we don't
* find a parent, put in lost+found.
*/
+ log_info("Checking directory linkage.\n");
for(i = 0; i < FSCK_HASH_SIZE; i++) {
osi_list_foreach(tmp, &sbp->dir_hash[i]) {
di = osi_list_entry(tmp, struct dir_info, list);
@@ -270,8 +271,8 @@
break;
}
else {
- log_info("Directory at block %"PRIu64
- " connected\n", di->dinode);
+ log_debug("Directory at block %" PRIu64 " connected\n",
+ di->dinode);
}
di = tdi;
}
--- cluster/gfs/gfs_fsck/pass4.c 2005/03/07 22:06:01 1.1.2.5
+++ cluster/gfs/gfs_fsck/pass4.c 2006/06/20 14:42:56 1.1.2.6
@@ -59,8 +59,8 @@
(ii->inode == sbp->sb.sb_quota_di.no_addr) ||
(ii->inode == sbp->sb.sb_license_di.no_addr))
continue;
- log_info("Checking reference count on inode at block %"PRIu64
- "\n", ii->inode);
+ log_debug("Checking reference count on inode at block %"PRIu64
+ "\n", ii->inode);
if(ii->counted_links == 0) {
log_err("Found unlinked inode at %"PRIu64"\n",
ii->inode);
@@ -174,6 +174,7 @@
if(sbp->lf_dip)
log_debug("At beginning of pass4, l+f entries is %u\n",
sbp->lf_dip->i_di.di_entries);
+ log_info("Checking inode reference counts.\n");
for (i = 0; i < FSCK_HASH_SIZE; i++) {
list = &sbp->inode_hash[i];
if(scan_inode_list(sbp, list)) {
--- cluster/gfs/gfs_fsck/pass5.c 2006/04/20 20:43:41 1.1.2.6
+++ cluster/gfs/gfs_fsck/pass5.c 2006/06/20 14:42:56 1.1.2.7
@@ -17,6 +17,7 @@
#include "ondisk.h"
#include "fs_bits.h"
#include "bio.h"
+#include "util.h"
#ifdef DEBUG
int rgrp_countbits(unsigned char *buffer, unsigned int buflen,
@@ -191,6 +192,8 @@
while(byte < end) {
rg_status = ((*byte >> bit) & GFS_BIT_MASK);
block = rg_data + *rg_block;
+ log_debug("Checking block %" PRIu64 "\n", block);
+ warm_fuzzy_stuff(block);
block_check(sbp->bl, block, &q);
block_status = convert_mark(q.block_type, count);
--- cluster/gfs/gfs_fsck/super.c 2006/04/20 20:43:41 1.2.2.6
+++ cluster/gfs/gfs_fsck/super.c 2006/06/20 14:42:56 1.2.2.7
@@ -1081,9 +1081,9 @@
gfs_grow or something. Count the RGs by hand. */
} trust_lvl;
- log_warn("Validating Resource Group index.\n");
+ log_info("Validating Resource Group index.\n");
for (trust_lvl = blind_faith; trust_lvl <= distrust; trust_lvl++) {
- log_warn("Level %d check.\n", trust_lvl + 1);
+ log_info("Level %d check.\n", trust_lvl + 1);
count1 = count2 = 0;
/* ---------------------------------------------------------------- */
/* Step 1 - Calculate or figure out our own RG index */
@@ -1097,7 +1097,7 @@
error = gfs_rgindex_calculate(sdp, &expected_rglist,
&calc_rg_count);
if (error) { /* If calculated RGs don't reasonably match the fs */
- log_warn("(failed--trying again at level 3)\n");
+ log_info("(failed--trying again at level 3)\n");
ri_cleanup(&sdp->rglist);
continue; /* Try again, this time counting them manually */
}
@@ -1106,7 +1106,7 @@
error = gfs_rgindex_rebuild(sdp, &expected_rglist,
&calc_rg_count); /* count the RGs. */
if (error) { /* If calculated RGs don't reasonably match the fs */
- log_err("(failed--giving up)\n");
+ log_info("(failed--giving up)\n");
goto fail; /* try again, this time counting them manually */
}
}
@@ -1177,7 +1177,7 @@
count1++;
} /* for all RGs in the index */
if (!error) {
- log_warn("%u resource groups found.\n", rg);
+ log_info("%u resource groups found.\n", rg);
if (trust_lvl != blind_faith && rg != calc_rg_count)
log_warn("Resource group count discrepancy. Index says %d. " \
"Should be %d.\n", rg, calc_rg_count);
@@ -1205,15 +1205,15 @@
sdp->rgcount = count1;
}
if (!error) { /* if no problems encountered with the rgs */
- log_warn("(passed)\n");
+ log_info("(passed)\n");
break; /* no reason to distrust what we saw. Otherwise, we
reiterate and become a little less trusting. */
}
else {
if (trust_lvl < distrust)
- log_warn("(failed--trying again at level 2)\n");
+ log_info("(failed--trying again at level 2)\n");
else
- log_err("(failed--recovery impossible)\n");
+ log_info("(failed--recovery impossible)\n");
}
ri_cleanup(&sdp->rglist);
} /* for trust_lvl */
--- cluster/gfs/gfs_fsck/util.c 2005/03/03 19:57:03 1.1.2.3
+++ cluster/gfs/gfs_fsck/util.c 2006/06/20 14:42:56 1.1.2.4
@@ -313,3 +313,24 @@
return NULL;
}
#endif
+
+/* Put out a warm, fuzzy message every second so the user */
+/* doesn't think we hung. (This may take a long time). */
+void warm_fuzzy_stuff(uint64_t block)
+{
+ static struct timeval tv;
+ static uint32_t seconds = 0;
+
+ gettimeofday(&tv, NULL);
+ if (!seconds)
+ seconds = tv.tv_sec;
+ if (tv.tv_sec - seconds) {
+ uint64_t percent;
+
+ seconds = tv.tv_sec;
+ if (last_fs_block) {
+ percent = (block * 100) / last_fs_block;
+ log_notice("\r%" PRIu64 " percent complete.\r", percent);
+ }
+ }
+}
--- cluster/gfs/gfs_fsck/util.h 2005/03/03 19:57:03 1.1.2.2
+++ cluster/gfs/gfs_fsck/util.h 2006/06/20 14:42:56 1.1.2.3
@@ -35,5 +35,6 @@
int next_rg_meta_free(struct fsck_rgrp *rgd, uint64 *block, int first, int *free);
int next_rg_metatype(struct fsck_rgrp *rgd, uint64 *block, uint32 type, int first);
struct di_info *search_list(osi_list_t *list, uint64 addr);
+void warm_fuzzy_stuff(uint64_t block);
#endif /* __UTIL_H__ */
More information about the Cluster-devel
mailing list