[Cluster-devel] cluster/gfs/gfs_fsck fs_dir.c fs_dir.h metawal ...

rpeterso at sourceware.org rpeterso at sourceware.org
Fri May 4 14:07:05 UTC 2007


CVSROOT:	/cvs/cluster
Module name:	cluster
Branch: 	RHEL5
Changes by:	rpeterso at sourceware.org	2007-05-04 14:07:03

Modified files:
	gfs/gfs_fsck   : fs_dir.c fs_dir.h metawalk.c metawalk.h pass1.c 
	                 pass2.c 

Log message:
	Resolves: bz 229484: gfs_fsck not good at fixing corrupt directory entries

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fs_dir.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.10&r2=1.10.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/fs_dir.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3&r2=1.3.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/metawalk.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8.2.1&r2=1.8.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/metawalk.h.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.3&r2=1.3.4.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass1.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.14.2.1&r2=1.14.2.2
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/gfs/gfs_fsck/pass2.c.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.12.2.2&r2=1.12.2.3

--- cluster/gfs/gfs_fsck/fs_dir.c	2005/08/01 16:20:48	1.10
+++ cluster/gfs/gfs_fsck/fs_dir.c	2007/05/04 14:07:03	1.10.2.1
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -18,7 +18,8 @@
 #include "fs_inode.h"
 #include "bio.h"
 #include "link.h"
-
+#include "limits.h"
+#include "metawalk.h"
 #include "fs_dir.h"
 
 #define IS_LEAF     (1)
@@ -1633,6 +1634,34 @@
 
 
 /**
+ * put_leaf_nr - Put a leaf number associated with the index
+ * @dip: The GFS inode
+ * @index:
+ * @leaf_out:
+ *
+ * Returns: 0 on success, error code otherwise
+ */
+
+int put_leaf_nr(struct fsck_inode *dip, uint32 index, uint64 leaf_out)
+{
+	uint64 leaf_no;
+	int error = -1;
+
+	leaf_no = cpu_to_gfs64(leaf_out);
+
+	error = writei(dip, (char *)&leaf_no,
+		       index * sizeof(uint64), sizeof(uint64));
+	if (error != sizeof(uint64)){
+		log_debug("put_leaf_nr:  Bad internal write.  (rtn = %d)\n",
+			  error);
+		return (error < 0) ? error : -EIO;
+	}
+
+	return 0;
+}
+
+
+/**
  * fs_filecmp - Compare two filenames
  * @file1: The first filename
  * @file2: The second filename
@@ -1685,3 +1714,52 @@
 
 	return error;
 }
+
+/**
+ * dirent_repair - attempt to repair a corrupt directory entry.
+ * @bh - The buffer header that contains the bad dirent
+ * @de - The directory entry in native format
+ * @dent - The directory entry in on-disk format
+ * @type - Type of directory (DIR_LINEAR or DIR_EXHASH)
+ * @first - TRUE if this is the first dirent in the buffer
+ *
+ * This function tries to repair a corrupt directory entry.  All we
+ * know at this point is that the length field is wrong.
+ */
+int dirent_repair(struct fsck_inode *ip, osi_buf_t *bh, struct gfs_dirent *de, 
+		  struct gfs_dirent *dent, int type, int first)
+{
+	char *bh_end, *p;
+	int calc_de_name_len = 0;
+	
+	/* If this is a sentinel, just fix the length and move on */
+	if (first && !de->de_inum.no_formal_ino) { /* Is it a sentinel? */
+		if (type == DIR_LINEAR)
+			de->de_rec_len = BH_SIZE(bh) -
+				sizeof(struct gfs_dinode);
+		else
+			de->de_rec_len = BH_SIZE(bh) - sizeof(struct gfs_leaf);
+	}
+	else {
+		bh_end = BH_DATA(bh) + BH_SIZE(bh);
+		/* first, figure out a probable name length */
+		p = (char *)dent + sizeof(struct gfs_dirent);
+		while (*p &&         /* while there's a non-zero char and */
+		       p < bh_end) { /* not past end of buffer */
+			calc_de_name_len++;
+			p++;
+		}
+		if (!calc_de_name_len)
+			return 1;
+		/* There can often be noise at the end, so only          */
+		/* Trust the shorter of the two in case we have too much */
+		/* Or rather, only trust ours if it's shorter.           */
+		if (!de->de_name_len || de->de_name_len > NAME_MAX ||
+		    calc_de_name_len < de->de_name_len) /* if dent is hosed */
+			de->de_name_len = calc_de_name_len; /* use ours */
+		de->de_rec_len = GFS_DIRENT_SIZE(de->de_name_len);
+	}
+	gfs_dirent_out(de, (char *)dent);
+	write_buf(ip->i_sbd, bh, 0);
+	return 0;
+}
--- cluster/gfs/gfs_fsck/fs_dir.h	2005/02/07 15:27:45	1.3
+++ cluster/gfs/gfs_fsck/fs_dir.h	2007/05/04 14:07:03	1.3.2.1
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -31,6 +31,7 @@
 int fsck_inode_is_stuffed(struct fsck_inode *ip);
 int dirent_first(osi_buf_t *bh, struct gfs_dirent **dent);
 int get_leaf_nr(struct fsck_inode *dip, uint32 index, uint64 *leaf_out);
+int put_leaf_nr(struct fsck_inode *dip, uint32 index, uint64 leaf_out);
 int fs_filecmp(osi_filename_t *file1, char *file2, int len_of_file2);
 int fs_dirent_del(struct fsck_inode *dip, osi_buf_t *bh, osi_filename_t *filename);
 int fs_dir_add(struct fsck_inode *dip, osi_filename_t *filename,
@@ -39,5 +40,7 @@
 		    int name_len, struct gfs_dirent **dent_out);
 
 int fs_dir_search(struct fsck_inode *dip, identifier_t *id, unsigned int *type);
+int dirent_repair(struct fsck_inode *ip, osi_buf_t *bh, struct gfs_dirent *de, 
+		  struct gfs_dirent *dent, int type, int first);
 
 #endif /* __FS_DIR_H__ */
--- cluster/gfs/gfs_fsck/metawalk.c	2007/02/20 18:55:34	1.8.2.1
+++ cluster/gfs/gfs_fsck/metawalk.c	2007/05/04 14:07:03	1.8.2.2
@@ -1,7 +1,7 @@
 /*****************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -59,6 +59,24 @@
 		gfs_dirent_in(&de, (char *)dent);
 		filename = (char *)dent + sizeof(struct gfs_dirent);
 
+		if (de.de_rec_len < sizeof(struct gfs_dirent) +
+		    de.de_name_len || !de.de_name_len) {
+			log_err("Directory block %"
+				PRIu64 ", entry %d of directory %"
+				PRIu64 " is corrupt.\n", BH_BLKNO(bh),
+				(*count) + 1, ip->i_di.di_num.no_addr);
+			if (query(ip->i_sbd, "Attempt to repair it? (y/n) ")) {
+				if (dirent_repair(ip, bh, &de, dent, type,
+						  first))
+					break;
+			}
+			else {
+				log_err("Corrupt directory entry %d ignored, "
+					"stopped after checking %d entries.\n",
+					*count);
+				break;
+			}
+		}
 		if (!de.de_inum.no_formal_ino){
 			if(first){
 				log_debug("First dirent is a sentinel (place holder).\n");
@@ -83,12 +101,6 @@
 			  }*/
 		}
 
-		if (de.de_rec_len < sizeof(struct gfs_dirent)) {
-			log_err("Entry %"PRIu64" of directory %"
-				PRIu64" is corrupt, skipping.\n",
-				BH_BLKNO(bh), ip->i_di.di_num.no_addr);
-			break;
-		}
 		if ((char *)dent + de.de_rec_len >= bh_end){
 			log_debug("Last entry processed.\n");
 			break;
@@ -109,13 +121,36 @@
 }
 
 
+/* Process a bad leaf pointer and ask to repair the first time.      */
+/* The repair process involves extending the previous leaf's entries */
+/* so that they replace the bad ones.  We have to hack up the old    */
+/* leaf a bit, but it's better than deleting the whole directory,    */
+/* which is what used to happen before.                              */
+void warn_and_patch(struct fsck_inode *ip, uint64_t *leaf_no, 
+		    uint64_t *bad_leaf, uint64_t old_leaf, int index,
+		    const char *msg)
+{
+	if (*bad_leaf != *leaf_no) {
+		log_err("Directory Inode %" PRIu64 " points to leaf %"
+			PRIu64 " %s.\n", ip->i_di.di_num.no_addr, *leaf_no,
+			msg);
+	}
+	if (*leaf_no == *bad_leaf ||
+	    query(ip->i_sbd, "Attempt to patch around it? (y/n) ")) {
+		put_leaf_nr(ip, index, old_leaf);
+	}
+	else
+		log_err("Bad leaf left in place.\n");
+	*bad_leaf = *leaf_no;
+	*leaf_no = old_leaf;
+}
 
 /* Checks exthash directory entries */
 int check_leaf(struct fsck_inode *ip, int *update, struct metawalk_fxns *pass)
 {
 	int error;
-	struct gfs_leaf leaf;
-	uint64_t leaf_no, old_leaf;
+	struct gfs_leaf leaf, oldleaf;
+	uint64_t leaf_no, old_leaf, bad_leaf = -1;
 	osi_buf_t *lbh;
 	int index;
 	struct fsck_sb *sbp = ip->i_sbd;
@@ -123,6 +158,7 @@
 	int ref_count = 0, exp_count = 0;
 
 	old_leaf = 0;
+	memset(&oldleaf, 0, sizeof(oldleaf));
 	for(index = 0; index < (1 << ip->i_di.di_depth); index++) {
 		if(get_leaf_nr(ip, index, &leaf_no)) {
 			log_err("Unable to get leaf block number in dir %"
@@ -138,7 +174,12 @@
 		/* GFS has multiple indirect pointers to the same leaf
 		 * until those extra pointers are needed, so skip the
 		 * dups */
-		if(old_leaf == leaf_no) {
+		if (leaf_no == bad_leaf) {
+			put_leaf_nr(ip, index, old_leaf); /* fill w/old leaf */
+			ref_count++;
+			continue;
+		}
+		else if(old_leaf == leaf_no) {
 			ref_count++;
 			continue;
 		} else {
@@ -150,54 +191,68 @@
 					 old_leaf,
 					 ref_count,
 					 exp_count);
-				return 1;
+				if (query(ip->i_sbd, "Attempt to fix it? (y/n) ")) {
+					int factor = 0, divisor = ref_count;
+
+					get_and_read_buf(sbp, old_leaf, &lbh,
+							 0);
+					while (divisor > 1) {
+						factor++;
+						divisor /= 2;
+					}
+					oldleaf.lf_depth = ip->i_di.di_depth -
+						factor;
+					gfs_leaf_out(&oldleaf, BH_DATA(lbh));
+					write_buf(sbp, lbh, 0);
+					relse_buf(sbp, lbh);
+				}
+				else
+					return 1;
 			}
 			ref_count = 1;
 		}
 
 		count = 0;
 		do {
-			/* FIXME: Do other checks (see old
-			 * pass3:dir_exhash_scan() */
-			lbh = NULL;
-			if(pass->check_leaf) {
-				error = pass->check_leaf(ip, leaf_no, &lbh,
-							 pass->private);
-				if(error < 0) {
-					stack;
-					relse_buf(sbp, lbh);
-					return -1;
-				}
-				if(error > 0) {
-					relse_buf(sbp, lbh);
-					lbh = NULL;
-					return 1;
-				}
+			/* Make sure the block number is in range. */
+			if(check_range(ip->i_sbd, leaf_no)){
+				log_err("Leaf block #%"PRIu64" is out of "
+					"range for directory #%"PRIu64".\n",
+					leaf_no, ip->i_di.di_num.no_addr);
+				warn_and_patch(ip, &leaf_no, &bad_leaf,
+					       old_leaf, index,
+					       "that is out of range");
+				memcpy(&leaf, &oldleaf, sizeof(oldleaf));
+				break;
 			}
-
-			if (!lbh){
-				if(get_and_read_buf(sbp, leaf_no,
-						    &lbh, 0)){
-					log_err("Unable to read leaf block #%"
-						PRIu64" for "
-						"directory #%"PRIu64".\n",
-						leaf_no,
-						ip->i_di.di_num.no_addr);
-					/* FIXME: should i error out
-					 * if this fails? */
-					break;
-				}
+			/* Try to read in the leaf block. */
+			if(get_and_read_buf(sbp, leaf_no, &lbh, 0)){
+				log_err("Unable to read leaf block #%"
+					PRIu64" for "
+					"directory #%"PRIu64".\n",
+					leaf_no, ip->i_di.di_num.no_addr);
+				warn_and_patch(ip, &leaf_no, &bad_leaf,
+					       old_leaf, index,
+					       "that cannot be read");
+				memcpy(&leaf, &oldleaf, sizeof(oldleaf));
+				relse_buf(sbp, lbh);
+				break;
 			}
-			gfs_leaf_in(&leaf, BH_DATA(lbh));
-
-			/* Make sure it's really a leaf. */
-			if (leaf.lf_header.mh_type != GFS_METATYPE_LF) {
-				log_err("Inode %" PRIu64 " points to bad leaf "
-					PRIu64 ".\n", ip->i_di.di_num.no_addr,
-					leaf_no);
+			/* Make sure it's really a valid leaf block. */
+			if (check_meta(lbh, GFS_METATYPE_LF)) {
+				warn_and_patch(ip, &leaf_no, &bad_leaf,
+					       old_leaf, index,
+					       "that is not really a leaf");
+				memcpy(&leaf, &oldleaf, sizeof(oldleaf));
 				relse_buf(sbp, lbh);
 				break;
 			}
+			gfs_leaf_in(&leaf, BH_DATA(lbh));
+			if(pass->check_leaf) {
+				error = pass->check_leaf(ip, leaf_no, lbh,
+							 pass->private);
+			}
+
 			exp_count = (1 << (ip->i_di.di_depth - leaf.lf_depth));
 			log_debug("expected count %u - %u %u\n", exp_count,
 				  ip->i_di.di_depth, leaf.lf_depth);
@@ -210,8 +265,7 @@
 
 				/* Since the buffer possibly got
 				   updated directly, release it now,
-				   and grab it again later if we need
-				   it */
+				   and grab it again later if we need it */
 				relse_buf(sbp, lbh);
 				if(error < 0) {
 					stack;
@@ -261,6 +315,7 @@
 			}
 		} while(1);
 		old_leaf = leaf_no;
+		memcpy(&oldleaf, &leaf, sizeof(oldleaf));
 	}
 	return 0;
 }
--- cluster/gfs/gfs_fsck/metawalk.h	2005/02/11 22:01:04	1.3
+++ cluster/gfs/gfs_fsck/metawalk.h	2007/05/04 14:07:03	1.3.4.1
@@ -1,7 +1,7 @@
 /*****************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -45,7 +45,7 @@
 struct metawalk_fxns {
 	void *private;
 	int (*check_leaf) (struct fsck_inode *ip, uint64_t block,
-			   osi_buf_t **bh, void *private);
+			   osi_buf_t *bh, void *private);
 	int (*check_metalist) (struct fsck_inode *ip, uint64_t block,
 			       osi_buf_t **bh, void *private);
 	int (*check_data) (struct fsck_inode *ip, uint64_t block,
--- cluster/gfs/gfs_fsck/pass1.c	2006/11/17 17:00:09	1.14.2.1
+++ cluster/gfs/gfs_fsck/pass1.c	2007/05/04 14:07:03	1.14.2.2
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -41,50 +41,14 @@
 	uint64_t ea_count;
 };
 
-static int leaf(struct fsck_inode *ip, uint64_t block, osi_buf_t **bh,
+static int leaf(struct fsck_inode *ip, uint64_t block, osi_buf_t *bh,
 		void *private)
 {
 	struct fsck_sb *sdp = ip->i_sbd;
 	struct block_count *bc = (struct block_count *) private;
-	if(check_range(sdp, block)){
-		log_warn("Leaf block #%"PRIu64" is out of range for "
-			 "directory #%"PRIu64".\n",
-			 block, ip->i_di.di_num.no_addr);
-		block_set(sdp->bl, ip->i_di.di_num.no_addr, bad_block);
-		return 1;
-	}
-	if(get_and_read_buf(sdp, block, bh, 0)){
-		log_err("Unable to read leaf block #%"PRIu64" for "
-			"directory #%"PRIu64".\n",
-			block, ip->i_di.di_num.no_addr);
-		if(query(sdp, "Clear directory inode at %"PRIu64"? (y/n) ",
-			 ip->i_di.di_num.no_addr)) {
-			block_set(sdp->bl, ip->i_di.di_num.no_addr, meta_inval);
-		} else {
-			log_err("Unreadable block %"PRIu64" ignored\n");
-		}
-		return 1;
-	}
-
-	if(check_meta(*bh, GFS_METATYPE_LF)){
-		log_err("Bad meta header for leaf block #%"PRIu64
-			" in directory #%"PRIu64". - is %u, should be %u\n",
-			 BH_BLKNO(*bh), ip->i_di.di_num.no_addr,
-			((struct gfs_meta_header *)BH_DATA((*bh)))->mh_type,
-			GFS_METATYPE_LF);
-		if(query(sdp, "Clear directory inode at %"PRIu64"? (y/n) ",
-			 ip->i_di.di_num.no_addr)) {
-			block_set(sdp->bl, ip->i_di.di_num.no_addr,
-				  meta_inval);
-			log_err("Directory inode marked invalid\n");
-		} else {
-			log_err("Invalid block %"PRIu64" ignored\n");
-		}
-		return 1;
-	}
 
-	log_debug("\tLeaf block at %15"PRIu64"\n", BH_BLKNO(*bh));
-	block_set(sdp->bl, BH_BLKNO(*bh), leaf_blk);
+	log_debug("\tLeaf block at %15"PRIu64"\n", BH_BLKNO(bh));
+	block_set(sdp->bl, BH_BLKNO(bh), leaf_blk);
 	bc->indir_count++;
 
 	return 0;
@@ -473,7 +437,7 @@
 }
 
 int clear_leaf(struct fsck_inode *ip, uint64_t block,
-	       osi_buf_t **bh, void *private)
+	       osi_buf_t *bh, void *private)
 {
 
 	struct fsck_sb *sdp = ip->i_sbd;
@@ -529,8 +493,6 @@
 }
 
 
-
-
 int handle_di(struct fsck_sb *sdp, osi_buf_t *bh, uint64_t block, int mfree)
 {
 	struct block_query q = {0};
--- cluster/gfs/gfs_fsck/pass2.c	2007/02/20 18:55:34	1.12.2.2
+++ cluster/gfs/gfs_fsck/pass2.c	2007/05/04 14:07:03	1.12.2.3
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -36,63 +36,6 @@
 };
 
 
-static int check_leaf(struct fsck_inode *ip, uint64_t block, osi_buf_t **lbh,
-		      void *private)
-{
-	uint64_t chain_no;
-	struct fsck_sb *sbp = ip->i_sbd;
-	struct gfs_leaf leaf;
-	osi_buf_t *chain_head = NULL;
-	osi_buf_t *bh = NULL;
-	int chain=0;
-	int error;
-
-	chain_no = block;
-
-	do {
-		/* FIXME: check the range of the leaf? */
-		/* check the leaf and stuff */
-
-		error = get_and_read_buf(sbp, chain_no, &bh, 0);
-
-		if(error){
-			stack;
-			goto fail;
-		}
-
-		gfs_leaf_in(&leaf, BH_DATA(bh));
-
-		/* Check the leaf headers */
-
-		if(!chain){
-			chain = 1;
-			chain_head = bh;
-			chain_no = leaf.lf_next;
-		}
-		else {
-			relse_buf(sbp, bh);
-			bh = NULL;
-			break;
-		}
-	} while(chain_no);
-
-	*lbh = chain_head;
-	return 0;
-
- fail:
-	/* FIXME: check this error path */
-	if(chain_head){
-		if(chain_head == bh){ bh = NULL; }
-		relse_buf(sbp, chain_head);
-	}
-	if(bh)
-		relse_buf(sbp, bh);
-
-	return -1;
-
-}
-
-
 /* Set children's parent inode in dir_info structure - ext2 does not set
  * dotdot inode here, but instead in pass3 - should we? */
 int set_parent_dir(struct fsck_sb *sbp, uint64_t childblock,
@@ -328,7 +271,8 @@
 	   q.block_type != inode_lnk && q.block_type != inode_blk &&
 	   q.block_type != inode_chr && q.block_type != inode_fifo &&
 	   q.block_type != inode_sock) {
-		log_err("Found directory entry '%s' in %"PRIu64" to something"
+		log_err("Found directory entry '%s' in block %"
+			PRIu64" to something"
 			" not a file or directory!\n", tmp_name,
 			ip->i_num.no_addr);
 		log_debug("block #%"PRIu64" in %"PRIu64"\n",
@@ -558,7 +502,7 @@
 
 struct metawalk_fxns pass2_fxns = {
 	.private = NULL,
-	.check_leaf = check_leaf,
+	.check_leaf = NULL,
 	.check_metalist = NULL,
 	.check_data = NULL,
 	.check_eattr_indir = check_eattr_indir,




More information about the Cluster-devel mailing list