[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] [gfs2-utils RHEL59 patch] gfs_fsck: Back-port some fixes from fsck.gfs2



Hi,

This is kinda big, so I'm posting it here before I push it to the
git repo.

This patch back-ports some patches from fsck.gfs2 to gfs_fsck
to allow gfs_fsck to better detect and fix file systems that have
invalid indirect metadata blocks.

Regards,

Bob Peterson
---
diff --git a/gfs/gfs_fsck/bio.c b/gfs/gfs_fsck/bio.c
index cd6833c..bf743e7 100644
--- a/gfs/gfs_fsck/bio.c
+++ b/gfs/gfs_fsck/bio.c
@@ -134,9 +134,6 @@ int write_buf(struct fsck_sb *sdp, osi_buf_t *bh, int flags){
 		return -1;
 	}
 
-	log_debug("Writing to %"PRIu64" - %"PRIu64" %u\n",
-		  (uint64)(BH_BLKNO(bh) * BH_SIZE(bh)),
-		  BH_BLKNO(bh), BH_SIZE(bh));
 	if(do_write(disk_fd, BH_DATA(bh), BH_SIZE(bh))) {
 		log_err("Unable to write %u bytes to position %"PRIu64"\n",
 			BH_SIZE(bh), (uint64)(BH_BLKNO(bh) * BH_SIZE(bh)));
diff --git a/gfs/gfs_fsck/metawalk.c b/gfs/gfs_fsck/metawalk.c
index e14d4dc..e485428 100644
--- a/gfs/gfs_fsck/metawalk.c
+++ b/gfs/gfs_fsck/metawalk.c
@@ -664,7 +664,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
 	osi_list_t *prev_list, *cur_list, *tmp;
 	int i, head_size;
 	uint64 *ptr, block;
-	int err;
+	int error = 0, err;
 
 	if(get_and_read_buf(ip->i_sbd, ip->i_di.di_num.no_addr, &metabh)) {
 		stack;
@@ -708,13 +708,22 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
 							   pass->private);
 				if(err < 0) {
 					stack;
+					error = err;
 					goto fail;
 				}
 				if(err > 0) {
+					if (!error)
+						error = err;
 					log_debug("Skipping block %"PRIu64
 						  "\n", block);
 					continue;
 				}
+				if (check_range(ip->i_sbd, block)) {
+					log_debug("Skipping invalid block "
+						  "%lld\n",
+						  (unsigned long long)block);
+					continue;
+				}
 				if(!nbh) {
 					if(get_and_read_buf(ip->i_sbd, block,
 							    &nbh)) {
@@ -726,7 +735,7 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
 			}
 		}
 	}
-	return 0;
+	return error;
 
  fail:
 	for (i = 0; i < GFS_MAX_META_HEIGHT; i++) {
@@ -739,9 +748,8 @@ static int build_metalist(struct fsck_inode *ip, osi_list_t *mlp,
 			relse_buf(ip->i_sbd, bh);
 		}
 	}
-	/* This is an error path, so we need to release the buffer here: */
-	relse_buf(ip->i_sbd, metabh);
-	return -1;
+	/* No need to relse_buf on metabh because it's in the list */
+	return error;
 }
 
 /**
@@ -769,9 +777,10 @@ int check_metatree(struct fsck_inode *ip, struct metawalk_fxns *pass)
 		osi_list_init(&metalist[i]);
 
 	/* create metalist for each level */
-	if(build_metalist(ip, &metalist[0], pass)){
+	error = build_metalist(ip, &metalist[0], pass);
+	if (error) {
 		stack;
-		return -1;
+		return error;
 	}
 
 	/* We don't need to record directory blocks - they will be
@@ -1072,9 +1081,6 @@ int delete_blocks(struct fsck_inode *ip, uint64_t block, osi_buf_t **bh,
 		if (block_check(ip->i_sbd->bl, block, &q))
 			return 0;
 		if (!q.dup_block) {
-			log_info("Deleting %s block %lld as part "
-				 "of inode %lld \n",
-				 btype, block, ip->i_di.di_num.no_addr);
 			block_set(ip->i_sbd->bl, block, block_free);
 			free_block(ip->i_sbd, block);
 		}
diff --git a/gfs/gfs_fsck/pass1.c b/gfs/gfs_fsck/pass1.c
index 4b9de02..a5cac7b 100644
--- a/gfs/gfs_fsck/pass1.c
+++ b/gfs/gfs_fsck/pass1.c
@@ -35,6 +35,8 @@
 #include "link.h"
 #include "metawalk.h"
 
+#define BAD_POINTER_TOLERANCE 10
+
 struct block_count {
 	uint64_t indir_count;
 	uint64_t data_count;
@@ -101,6 +103,8 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
 
 	*bh = NULL;
 
+	if (block == 135116188)
+		printf("here.");
 	if (check_range(ip->i_sbd, block)){ /* blk outside of FS */
 		block_set(sdp->bl, ip->i_di.di_num.no_addr, bad_block);
 		log_debug("Bad indirect block pointer (out of range).\n");
@@ -120,8 +124,11 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
         get_and_read_buf(ip->i_sbd, block, &nbh);
 
 	if (check_meta(nbh, GFS_METATYPE_IN)){
-		log_debug("Bad indirect block pointer (points to "
-			"something that is not an indirect block).\n");
+		log_err("Inode %lld has a bad indirect block pointer %lld "
+			"(points to something that is not an "
+			"indirect block).\n",
+			(unsigned long long)ip->i_di.di_num.no_addr,
+			(unsigned long long)block);
 		if(!found_dup) {
 			block_set(sdp->bl, block, meta_inval);
 			relse_buf(ip->i_sbd, nbh);
@@ -129,14 +136,19 @@ static int check_metalist(struct fsck_inode *ip, uint64_t block,
 		}
 
 		relse_buf(ip->i_sbd, nbh);
+		nbh = NULL;
 	} else /* blk check ok */
 		*bh = nbh;
 
-	if (!found_dup) {
+	bc->indir_count++;
+	if (found_dup) {
+		if (nbh)
+			relse_buf(ip->i_sbd, nbh);
+		return 1; /* don't process the metadata again */
+	} else {
 		log_debug("Setting %" PRIu64 " to indirect block.\n", block);
 		block_set(sdp->bl, block, indir_blk);
 	}
-	bc->indir_count++;
 
 	return 0;
 }
@@ -734,6 +746,93 @@ int add_to_dir_list(struct fsck_sb *sbp, uint64_t block)
 	return 0;
 }
 
+/**
+ * Check for massive amounts of pointer corruption.  If the block has
+ * lots of out-of-range pointers, we can't trust any of the pointers.
+ * For example, a stray pointer with a value of 0x1d might be
+ * corruption/nonsense, and if so, we don't want to delete an
+ * important file (like master or the root directory) because of it.
+ * We need to check for a large number of bad pointers BEFORE we start
+ * messing with them because we don't want to mark a block as a
+ * duplicate (for example) until we know if the pointers in general can
+ * be trusted. Thus it needs to be in a separate loop.
+ */
+static int rangecheck_block(struct fsck_inode *ip, uint64_t block,
+			    osi_buf_t **bh, const char *btype, void *private)
+{
+	long *bad_pointers = (long *)private;
+	struct block_query q = {0};
+
+	if (check_range(ip->i_sbd, block) != 0) {
+		(*bad_pointers)++;
+		log_debug("Bad %s block pointer (out of range #%ld) "
+			  "found in inode %lld.\n", btype, *bad_pointers,
+			   (unsigned long long)ip->i_di.di_num.no_addr);
+		if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+			return ENOENT;
+		else
+			return -ENOENT; /* Exits check_metatree quicker */
+	}
+	/* See how many duplicate blocks it has */
+	if(block_check(ip->i_sbd->bl, block, &q)) {
+		stack;
+		return -1;
+	}
+	if (q.block_type != block_free) {
+		(*bad_pointers)++;
+		log_debug("Duplicated %s block pointer (violation #%ld) "
+			  "found in inode %lld.\n", btype, *bad_pointers,
+			   (unsigned long long)ip->i_di.di_num.no_addr);
+		if ((*bad_pointers) <= BAD_POINTER_TOLERANCE)
+			return ENOENT;
+		else
+			return -ENOENT; /* Exits check_metatree quicker */
+	}
+	return 0;
+}
+
+static int rangecheck_metadata(struct fsck_inode *ip, uint64_t block,
+			       osi_buf_t **bh, void *private)
+{
+	return rangecheck_block(ip, block, bh, "metadata", private);
+}
+
+static int rangecheck_leaf(struct fsck_inode *ip, uint64_t block,
+			   osi_buf_t *bh, void *private)
+{
+	return rangecheck_block(ip, block, &bh, "leaf", private);
+}
+
+static int rangecheck_data(struct fsck_inode *ip, uint64_t block,
+			   struct fsck_rgrp **rgd, void *private)
+{
+	return rangecheck_block(ip, block, NULL, "data", private);
+}
+
+static int rangecheck_eattr_indir(struct fsck_inode *ip, uint64_t block,
+				  uint64_t parent, osi_buf_t **bh,
+				  void *private)
+{
+	return rangecheck_block(ip, block, NULL, "indirect extended attribute",
+				private);
+}
+
+static int rangecheck_eattr_leaf(struct fsck_inode *ip, uint64_t block,
+				 uint64_t parent, osi_buf_t **bh,
+				 void *private)
+{
+	return rangecheck_block(ip, block, NULL, "extended attribute",
+				private);
+}
+
+struct metawalk_fxns rangecheck_fxns = {
+        .private = NULL,
+        .check_metalist = rangecheck_metadata,
+        .check_data = rangecheck_data,
+        .check_leaf = rangecheck_leaf,
+        .check_eattr_indir = rangecheck_eattr_indir,
+        .check_eattr_leaf = rangecheck_eattr_leaf,
+};
 
 static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfree)
 {
@@ -741,6 +840,7 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr
 	struct fsck_inode *ip;
 	int error;
 	struct block_count bc = {0};
+	long bad_pointers = 0L;
 	struct metawalk_fxns invalidate_metatree = {0};
 	invalidate_metatree.check_metalist = clear_metalist;
 	invalidate_metatree.check_data = clear_data;
@@ -823,6 +923,25 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr
 		goto success;
 	}
 
+
+	/* First, check the metadata for massive amounts of pointer corruption.
+	   Such corruption can only lead us to ruin trying to clean it up,
+	   so it's better to check it up front and delete the inode if
+	   there is corruption. */
+	rangecheck_fxns.private = &bad_pointers;
+	error = check_metatree(ip, &rangecheck_fxns);
+	if (bad_pointers > BAD_POINTER_TOLERANCE) {
+		log_err("Error: inode %llu has more than %d bad pointers.\n",
+			 (unsigned long long)ip->i_di.di_num.no_addr,
+			 BAD_POINTER_TOLERANCE);
+		if (block_set(sdp->bl, block, meta_inval)) {
+			stack;
+			goto fail;
+		}
+		fs_set_bitmap(sdp, block, GFS_BLKST_FREE);
+		goto success;
+	}
+
 	switch(ip->i_di.di_type) {
 
 	case GFS_FILE_DIR:
@@ -930,8 +1049,9 @@ static int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfr
 		return 0;
 	}
 	if(error > 0) {
-		log_warn("Marking inode %lld invalid\n",
-			 ip->i_di.di_num.no_addr);
+		log_err("Error: inode %llu has unrecoverable errors; "
+			"invalidating.\n",
+			(unsigned long long)ip->i_di.di_num.no_addr);
 		/* FIXME: Must set all leaves invalid as well */
 		check_metatree(ip, &invalidate_metatree);
 		block_set(ip->i_sbd->bl, ip->i_di.di_num.no_addr, meta_inval);
diff --git a/gfs/gfs_fsck/util.c b/gfs/gfs_fsck/util.c
index bb53d8c..6d2a9a2 100644
--- a/gfs/gfs_fsck/util.c
+++ b/gfs/gfs_fsck/util.c
@@ -107,11 +107,8 @@ int check_meta(osi_buf_t *bh, int type){
  
   check_magic = gfs32_to_cpu(check_magic);
   check_type = gfs32_to_cpu(check_type);
-  if((check_magic != GFS_MAGIC) || (type && (check_type != type))){
-	  log_debug("For %"PRIu64" Expected %X:%X - got %X:%X\n", BH_BLKNO(bh), GFS_MAGIC, type,
-		    check_magic, check_type);
-    return -1;
-  }
+  if((check_magic != GFS_MAGIC) || (type && (check_type != type)))
+	  return -1;
   return 0;
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]