[Cluster-devel] cluster/gfs/gfs_fsck fsck.h initialize.c log.c ...
rpeterso at sourceware.org
rpeterso at sourceware.org
Fri Nov 17 16:32:52 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Branch: STABLE
Changes by: rpeterso at sourceware.org 2006-11-17 16:32:49
Modified files:
gfs/gfs_fsck : fsck.h initialize.c log.c main.c pass1.c
pass1b.c pass1c.c pass2.c pass3.c pass4.c
pass5.c util.c
Log message:
Resolves: bz208836 - fatal: invalid metadata block
1. Fix a memory leak in pass1b.
2. Improve performance of pass1b by combining loops through fs.
3. Give an error message and abort if file system > 16TB and node
architecture is 32-bits.
4. Give users an "Abort" "Continue" and "Skip" if they interrupt
with ctrl-c. Also, report progress for that pass on interrupt.
5. Added more "percent complete" messages for other passes.
See bz comment #33 for more details.
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fsck.h.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.4.1.2.1&r2=1.2.2.1.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/initialize.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4.4.2.2.4&r2=1.1.2.4.4.2.2.5
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/log.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.2.4.1.2.1&r2=1.2.2.2.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/main.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.2.6.1&r2=1.1.2.2.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.7.6.2&r2=1.1.2.7.6.3
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1b.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.1.6.1&r2=1.3.2.1.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1c.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.2.2.1.4.1.2.1&r2=1.2.2.1.4.1.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.3.2.4.6.1&r2=1.3.2.4.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass3.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.6.1&r2=1.1.2.3.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass4.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.5.6.1&r2=1.1.2.5.6.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass5.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.4.6.3&r2=1.1.2.4.6.4
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/util.c.diff?cvsroot=cluster&only_with_tag=STABLE&r1=1.1.2.3.6.1&r2=1.1.2.3.6.2
--- cluster/gfs/gfs_fsck/fsck.h 2006/06/21 14:21:21 1.2.2.1.4.1.2.1
+++ cluster/gfs/gfs_fsck/fsck.h 2006/11/17 16:32:49 1.2.2.1.4.1.2.2
@@ -26,7 +26,8 @@
int no:1;
};
-extern uint64_t last_fs_block;
+extern uint64_t last_fs_block, last_reported_block;
+extern int skip_this_pass, fsck_abort, fsck_query;
int initialize(struct fsck_sb *sbp);
void destroy(struct fsck_sb *sbp);
--- cluster/gfs/gfs_fsck/initialize.c 2006/09/20 16:37:56 1.1.2.4.4.2.2.4
+++ cluster/gfs/gfs_fsck/initialize.c 2006/11/17 16:32:49 1.1.2.4.4.2.2.5
@@ -210,6 +210,12 @@
}
sdp->last_fs_block = (jmax > rmax) ? jmax : rmax;
+ if (sdp->last_fs_block > 0xffffffff && sizeof(unsigned long) <= 4) {
+ log_crit("This file system is too big for this computer to handle.\n");
+ log_crit("Last fs block = 0x%llx, but sizeof(unsigned long) is %d bytes.\n",
+ sdp->last_fs_block, sizeof(unsigned long));
+ goto fail;
+ }
sdp->last_data_block = rmax;
sdp->first_data_block = rmin;
--- cluster/gfs/gfs_fsck/log.c 2006/06/21 14:21:21 1.2.2.2.4.1.2.1
+++ cluster/gfs/gfs_fsck/log.c 2006/11/17 16:32:49 1.2.2.2.4.1.2.2
@@ -102,6 +102,7 @@
if(sbp->opts->no)
return 0;
+ fsck_query = TRUE;
/* Watch stdin (fd 0) to see when it has input. */
FD_ZERO(&rfds);
FD_SET(STDIN_FILENO, &rfds);
@@ -149,5 +150,6 @@
read(STDIN_FILENO, &response, sizeof(char));
}
+ fsck_query = FALSE;
return ret;
}
--- cluster/gfs/gfs_fsck/main.c 2006/06/21 14:21:21 1.1.2.2.6.1
+++ cluster/gfs/gfs_fsck/main.c 2006/11/17 16:32:49 1.1.2.2.6.2
@@ -14,13 +14,17 @@
#include <stdio.h>
#include <stdint.h>
#include <libgen.h>
+#include <ctype.h>
+#include <signal.h>
#include "copyright.cf"
#include "fsck_incore.h"
#include "fsck.h"
#include "log.h"
-uint64_t last_fs_block;
+uint64_t last_fs_block, last_reported_block = -1;
+int skip_this_pass = FALSE, fsck_abort = FALSE, fsck_query = FALSE;
+const char *pass = "";
void print_map(struct block_list *il, int count)
{
@@ -110,6 +114,61 @@
return 0;
}
+void interrupt(int sig)
+{
+ fd_set rfds;
+ struct timeval tv;
+ char response;
+ int err;
+
+ if (fsck_query) /* if we're asking them a question */
+ return; /* ignore the interrupt signal */
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ /* Make sure there isn't extraneous input before asking the
+ * user the question */
+ while((err = select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv))) {
+ if(err < 0) {
+ log_debug("Error in select() on stdin\n");
+ break;
+ }
+ read(STDIN_FILENO, &response, sizeof(char));
+ }
+ while (TRUE) {
+ printf("\ngfs_fsck interrupted in %s: ", pass);
+ if (!last_reported_block || last_reported_block == last_fs_block)
+ printf("progress unknown.\n");
+ else
+ printf("processing block %" PRIu64 " out of %" PRIu64 "\n",
+ last_reported_block, last_fs_block);
+ printf("Do you want to abort gfs_fsck, skip the rest of %s or continue (a/s/c)?", pass);
+
+ /* Make sure query is printed out */
+ fflush(stdout);
+ read(STDIN_FILENO, &response, sizeof(char));
+
+ if(tolower(response) == 's') {
+ skip_this_pass = TRUE;
+ return;
+ }
+ else if (tolower(response) == 'a') {
+ fsck_abort = TRUE;
+ return;
+ }
+ else if (tolower(response) == 'c')
+ return;
+ else {
+ while(response != '\n')
+ read(STDIN_FILENO, &response, sizeof(char));
+ printf("Bad response, please type 'c', 'a' or 's'.\n");
+ continue;
+ }
+ }
+}
+
int main(int argc, char **argv)
{
struct fsck_sb sb;
@@ -127,44 +186,100 @@
if (initialize(sbp))
return 1;
+ signal(SIGINT, interrupt);
log_notice("Starting pass1\n");
+ pass = "pass 1";
+ last_reported_block = 0;
if (pass1(sbp))
return 1;
- log_notice("Pass1 complete \n");
-
- log_notice("Starting pass1b\n");
- if(pass1b(sbp))
- return 1;
- log_notice("Pass1b complete \n");
-
- log_notice("Starting pass1c\n");
- if(pass1c(sbp))
- return 1;
- log_notice("Pass1c complete \n");
-
- log_notice("Starting pass2\n");
- if (pass2(sbp, &opts))
- return 1;
- log_notice("Pass2 complete \n");
-
- log_notice("Starting pass3\n");
- if (pass3(sbp, &opts))
- return 1;
- log_notice("Pass3 complete \n");
-
- log_notice("Starting pass4\n");
- if (pass4(sbp, &opts))
- return 1;
- log_notice("Pass4 complete \n");
-
- log_notice("Starting pass5\n");
- if (pass5(sbp, &opts))
- return 1;
- log_notice("Pass5 complete \n");
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1 interrupted \n");
+ }
+ else
+ log_notice("Pass1 complete \n");
-/* print_map(sbp->bl, sbp->last_fs_block); */
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 1b";
+ log_notice("Starting pass1b\n");
+ if(pass1b(sbp))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1b interrupted \n");
+ }
+ else
+ log_notice("Pass1b complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 1c";
+ log_notice("Starting pass1c\n");
+ if(pass1c(sbp))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass1c interrupted \n");
+ }
+ else
+ log_notice("Pass1c complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 2";
+ log_notice("Starting pass2\n");
+ if (pass2(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass2 interrupted \n");
+ }
+ else
+ log_notice("Pass2 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 3";
+ log_notice("Starting pass3\n");
+ if (pass3(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass3 interrupted \n");
+ }
+ else
+ log_notice("Pass3 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 4";
+ log_notice("Starting pass4\n");
+ if (pass4(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass4 interrupted \n");
+ }
+ else
+ log_notice("Pass4 complete \n");
+ }
+ if (!fsck_abort) {
+ last_reported_block = 0;
+ pass = "pass 5";
+ log_notice("Starting pass5\n");
+ if (pass5(sbp, &opts))
+ return 1;
+ if (skip_this_pass || fsck_abort) {
+ skip_this_pass = FALSE;
+ log_notice("Pass5 interrupted \n");
+ }
+ else
+ log_notice("Pass5 complete \n");
+ /* print_map(sbp->bl, sbp->last_fs_block); */
- log_notice("Writing changes to disk\n");
+ log_notice("Writing changes to disk\n");
+ }
destroy(sbp);
return 0;
--- cluster/gfs/gfs_fsck/pass1.c 2006/06/21 14:21:21 1.1.2.7.6.2
+++ cluster/gfs/gfs_fsck/pass1.c 2006/11/17 16:32:49 1.1.2.7.6.3
@@ -905,6 +905,13 @@
break;
warm_fuzzy_stuff(block);
+ if (fsck_abort) /* if asked to abort */
+ return 0;
+ if (skip_this_pass) {
+ printf("Skipping pass 1 is not a good idea.\n");
+ skip_this_pass = FALSE;
+ fflush(stdout);
+ }
if(get_and_read_buf(sbp, block, &bh, 0)){
stack;
log_crit("Unable to retrieve block %"PRIu64
--- cluster/gfs/gfs_fsck/pass1b.c 2005/07/11 18:24:40 1.3.2.1.6.1
+++ cluster/gfs/gfs_fsck/pass1b.c 2006/11/17 16:32:49 1.3.2.1.6.2
@@ -470,6 +470,7 @@
osi_list_t *tmp;
struct metawalk_fxns find_dirents = {0};
find_dirents.check_dentry = &find_dentry;
+ int rc = 0;
osi_list_init(&sbp->dup_list);
/* Shove all blocks marked as duplicated into a list */
@@ -487,10 +488,14 @@
log_info("Scanning filesystem for inodes containing duplicate blocks...\n");
log_debug("Filesystem has %"PRIu64" blocks total\n", sbp->last_fs_block);
for(i = 0; i < sbp->last_fs_block; i += 1) {
+ warm_fuzzy_stuff(i);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ goto out;
log_debug("Scanning block %"PRIu64" for inodes\n", i);
if(block_check(sbp->bl, i, &q)) {
stack;
- return -1;
+ rc = -1;
+ goto out;
}
if((q.block_type == inode_dir) ||
(q.block_type == inode_file) ||
@@ -503,33 +508,27 @@
b = osi_list_entry(tmp, struct blocks, list);
if(find_block_ref(sbp, i, b)) {
stack;
- return -1;
+ rc = -1;
+ goto out;
}
}
}
- }
-
- /* Rescan the fs looking for directory entries to the inodes
- * with duplicate blocks - might need this to deal with the
- * inode correctly */
- log_info("Looking through directory entries for inodes with duplicate blocks...\n");
- for(i = 0; i < sbp->last_fs_block; i++) {
- if(block_check(sbp->bl, i, &q)) {
- stack;
- return 0;
- }
if(q.block_type == inode_dir) {
check_dir(sbp, i, &find_dirents);
}
}
-
/* Fix dups here - it's going to slow things down a lot to fix
* it later */
log_info("Handling duplicate blocks\n");
- osi_list_foreach(tmp, &sbp->dup_list) {
- b = osi_list_entry(tmp, struct blocks, list);
- handle_dup_blk(sbp, b);
+out:
+ /*osi_list_foreach(tmp, &sbp->dup_list) {*/
+ while (!osi_list_empty(&sbp->dup_list)) {
+ b = osi_list_entry(sbp->dup_list.next, struct blocks, list);
+ if (!skip_this_pass && !rc) /* no error & not asked to skip the rest */
+ handle_dup_blk(sbp, b);
+ osi_list_del(&b->list);
+ free(b);
}
- return 0;
+ return rc;
}
--- cluster/gfs/gfs_fsck/pass1c.c 2006/02/02 01:16:10 1.2.2.1.4.1.2.1
+++ cluster/gfs/gfs_fsck/pass1c.c 2006/11/17 16:32:49 1.2.2.1.4.1.2.2
@@ -252,6 +252,8 @@
log_info("Looking for inodes containing ea blocks...\n");
while (!find_next_block_type(sbp->bl, eattr_block, &block_no)) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
log_info("EA in inode %"PRIu64"\n", block_no);
if(get_and_read_buf(sbp, block_no, &bh, 0)) {
stack;
--- cluster/gfs/gfs_fsck/pass2.c 2006/06/21 14:21:21 1.3.2.4.6.1
+++ cluster/gfs/gfs_fsck/pass2.c 2006/11/17 16:32:49 1.3.2.4.6.2
@@ -812,6 +812,10 @@
/* Grab each directory inode, and run checks on it */
for(i = 0; i < sbp->last_fs_block; i++) {
+ warm_fuzzy_stuff(i);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
+
/* Skip the root inode - it's checked above */
if(i == sbp->sb.sb_root_di.no_addr)
continue;
--- cluster/gfs/gfs_fsck/pass3.c 2006/06/21 14:21:21 1.1.2.3.6.1
+++ cluster/gfs/gfs_fsck/pass3.c 2006/11/17 16:32:49 1.1.2.3.6.2
@@ -213,6 +213,8 @@
/* FIXME: Change this so it returns success or
* failure and put the parent inode in a
* param */
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
tdi = mark_and_return_parent(sbp, di);
/* FIXME: Factor this ? */
--- cluster/gfs/gfs_fsck/pass4.c 2006/06/21 14:21:21 1.1.2.5.6.1
+++ cluster/gfs/gfs_fsck/pass4.c 2006/11/17 16:32:49 1.1.2.5.6.2
@@ -49,6 +49,8 @@
/* FIXME: should probably factor this out into a generic
* scanning fxn */
osi_list_foreach(tmp, list) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
if(!(ii = osi_list_entry(tmp, struct inode_info, list))) {
log_crit("osi_list_foreach broken in scan_info_list!!\n");
exit(1);
@@ -176,6 +178,8 @@
sbp->lf_dip->i_di.di_entries);
log_info("Checking inode reference counts.\n");
for (i = 0; i < FSCK_HASH_SIZE; i++) {
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
list = &sbp->inode_hash[i];
if(scan_inode_list(sbp, list)) {
stack;
--- cluster/gfs/gfs_fsck/pass5.c 2006/06/21 14:21:21 1.1.2.4.6.3
+++ cluster/gfs/gfs_fsck/pass5.c 2006/11/17 16:32:49 1.1.2.4.6.4
@@ -194,6 +194,8 @@
block = rg_data + *rg_block;
log_debug("Checking block %" PRIu64 "\n", block);
warm_fuzzy_stuff(block);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
block_check(sbp->bl, block, &q);
block_status = convert_mark(q.block_type, count);
@@ -266,6 +268,8 @@
BH_DATA(rgp->rd_bh[i]) + bits->bi_offset,
bits->bi_len, &rg_block,
rgp->rd_ri.ri_data1, count);
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
}
/* Compare the rgrps counters with what we found */
@@ -351,6 +355,8 @@
/* Reconcile RG bitmaps with fsck bitmap */
for(tmp = sbp->rglist.next; tmp != &sbp->rglist; tmp = tmp->next){
+ if (skip_this_pass || fsck_abort) /* if asked to skip the rest */
+ return 0;
log_info("Updating Resource Group %"PRIu64"\n", rg_count);
memset(count, 0, sizeof(*count) * 5);
rgp = osi_list_entry(tmp, struct fsck_rgrp, rd_list);
--- cluster/gfs/gfs_fsck/util.c 2006/06/21 14:21:24 1.1.2.3.6.1
+++ cluster/gfs/gfs_fsck/util.c 2006/11/17 16:32:49 1.1.2.3.6.2
@@ -321,6 +321,7 @@
static struct timeval tv;
static uint32_t seconds = 0;
+ last_reported_block = block;
gettimeofday(&tv, NULL);
if (!seconds)
seconds = tv.tv_sec;
More information about the Cluster-devel
mailing list