[Cluster-devel] fsck: Clean up pass1 inode iteration code

Andrew Price anprice at redhat.com
Fri Feb 22 13:34:17 UTC 2013


Hi Steve,

On 22/02/13 12:08, Steven Whitehouse wrote:
>
> The original version of this patch, including readahead for the
> inodes only appears to deliver a performance improvement in some
> particular cases, and slows down other cases. So this patch is
> fairly similar in that it includes the clean ups from the previous
> attempt, however the readahead has been left out for the time
> being until the problem is better understood, and can be fixed.
>
> This clean up still has the advantage of better code structure
> around the main loop, and in addition it also reduced the number
> of read requests which are generated too.

Looks good to me.

> Signed-off-by: Steven Whitehouse <swhiteho at redhat.com>
>
> diff --git a/gfs2/fsck/pass1.c b/gfs2/fsck/pass1.c
> index 540f2a9..9a34e97 100644
> --- a/gfs2/fsck/pass1.c
> +++ b/gfs2/fsck/pass1.c
> @@ -14,6 +14,7 @@
>   #include <unistd.h>
>   #include <string.h>
>   #include <time.h>
> +#include <fcntl.h>
>   #include <sys/ioctl.h>
>   #include <inttypes.h>
>   #include <libintl.h>
> @@ -1547,6 +1548,128 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
>   	return 0;
>   }
>
> +static int pass1_process_bitmap(struct gfs2_sbd *sdp, struct rgrp_tree *rgd, uint64_t *ibuf, unsigned n)
> +{
> +	struct gfs2_buffer_head *bh;
> +	unsigned i;
> +	uint64_t block;
> +
> +	for (i = 0; i < n; i++) {
> +		block = ibuf[i];
> +
> +		/* skip gfs1 rindex indirect blocks */
> +		if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
> +			log_debug(_("Skipping rindex indir block "
> +				    "%lld (0x%llx)\n"),
> +				  (unsigned long long)block,
> +				  (unsigned long long)block);
> +			continue;
> +		}
> +		warm_fuzzy_stuff(block);
> +
> +		if (fsck_abort) { /* if asked to abort */
> +			gfs2_special_free(&gfs1_rindex_blks);
> +			return FSCK_OK;
> +		}
> +		if (skip_this_pass) {
> +			printf( _("Skipping pass 1 is not a good idea.\n"));
> +			skip_this_pass = FALSE;
> +			fflush(stdout);
> +		}
> +		if (fsck_system_inode(sdp, block)) {
> +			log_debug(_("Already processed system inode "
> +				    "%lld (0x%llx)\n"),
> +				  (unsigned long long)block,
> +				  (unsigned long long)block);
> +			continue;
> +		}
> +		bh = bread(sdp, block);
> +
> +		if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
> +			/* In gfs2, a bitmap mark of 2 means an inode,
> +			   but in gfs1 it means any metadata.  So if
> +			   this is gfs1 and not an inode, it may be
> +			   okay.  If it's non-dinode metadata, it will
> +			   be referenced by an inode, so we need to
> +			   skip it here and it will be sorted out
> +			   when the referencing inode is checked. */
> +			if (sdp->gfs1) {
> +				uint32_t check_magic;
> +
> +				check_magic = ((struct gfs2_meta_header *)
> +					       (bh->b_data))->mh_magic;
> +				if (be32_to_cpu(check_magic) == GFS2_MAGIC) {
> +					log_debug( _("Deferring GFS1 "
> +						     "metadata block #"
> +						     "%" PRIu64" (0x%"
> +						     PRIx64 ")\n"),
> +						   block, block);
> +					brelse(bh);
> +					continue;
> +				}
> +			}
> +			log_err( _("Found invalid inode at block #"
> +				   "%llu (0x%llx)\n"),
> +				 (unsigned long long)block,
> +				 (unsigned long long)block);
> +			if (gfs2_blockmap_set(bl, block, gfs2_block_free)) {
> +				stack;
> +				brelse(bh);
> +				gfs2_special_free(&gfs1_rindex_blks);
> +				return FSCK_ERROR;
> +			}
> +			check_n_fix_bitmap(sdp, block, gfs2_block_free);
> +		} else if (handle_di(sdp, bh) < 0) {
> +			stack;
> +			brelse(bh);
> +			gfs2_special_free(&gfs1_rindex_blks);
> +			return FSCK_ERROR;
> +		}
> +		/* Ignore everything else - they should be hit by the
> +		   handle_di step.  Don't check NONE either, because
> +		   check_meta passes everything if GFS2_METATYPE_NONE
> +		   is specified.  Hopefully, other metadata types such
> +		   as indirect blocks will be handled when the inode
> +		   itself is processed, and if it's not, it should be
> +		   caught in pass5. */
> +		brelse(bh);
> +	}
> +
> +	return 0;
> +}
> +
> +static int pass1_process_rgrp(struct gfs2_sbd *sdp, struct rgrp_tree *rgd)
> +{
> +	unsigned k, n, i;
> +	uint64_t *ibuf = malloc(sdp->bsize * GFS2_NBBY * sizeof(uint64_t));

Coverity will complain about ibuf not getting checked here.

Andy

> +	int ret;
> +
> +	for (k = 0; k < rgd->ri.ri_length; k++) {
> +		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_DINODE);
> +
> +		if (n) {
> +			ret = pass1_process_bitmap(sdp, rgd, ibuf, n);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		/*
> +		  For GFS1, we have to count the "free meta" blocks in the
> +		  resource group and mark them specially so we can count them
> +		  properly in pass5.
> +		 */
> +		if (!sdp->gfs1)
> +			continue;
> +
> +		n = lgfs2_bm_scan(rgd, k, ibuf, GFS2_BLKST_UNLINKED);
> +		for (i = 0; i < n; i++)
> +			gfs2_blockmap_set(bl, ibuf[i], gfs2_freemeta);
> +	}
> +
> +	free(ibuf);
> +	return 0;
> +}
> +
>   /**
>    * pass1 - walk through inodes and check inode state
>    *
> @@ -1563,12 +1686,10 @@ static int check_system_inodes(struct gfs2_sbd *sdp)
>   int pass1(struct gfs2_sbd *sdp)
>   {
>   	struct osi_node *n, *next = NULL;
> -	struct gfs2_buffer_head *bh;
> -	uint64_t block = 0;
>   	struct rgrp_tree *rgd;
> -	int first;
>   	uint64_t i;
>   	uint64_t rg_count = 0;
> +	int ret;
>
>   	osi_list_init(&gfs1_rindex_blks.list);
>
> @@ -1611,115 +1732,10 @@ int pass1(struct gfs2_sbd *sdp)
>   			gfs2_meta_rgrp);*/
>   		}
>
> -		first = 1;
> +		ret = pass1_process_rgrp(sdp, rgd);
> +		if (ret)
> +			return ret;
>
> -		while (1) {
> -			/* "block" is relative to the entire file system */
> -			/* Get the next dinode in the file system, according
> -			   to the bitmap.  This should ONLY be dinodes unless
> -			   it's GFS1, in which case it can be any metadata. */
> -			if (gfs2_next_rg_meta(rgd, &block, first))
> -				break;
> -			/* skip gfs1 rindex indirect blocks */
> -			if (sdp->gfs1 && blockfind(&gfs1_rindex_blks, block)) {
> -				log_debug(_("Skipping rindex indir block "
> -					    "%lld (0x%llx)\n"),
> -					  (unsigned long long)block,
> -					  (unsigned long long)block);
> -				first = 0;
> -				continue;
> -			}
> -			warm_fuzzy_stuff(block);
> -
> -			if (fsck_abort) { /* if asked to abort */
> -				gfs2_special_free(&gfs1_rindex_blks);
> -				return FSCK_OK;
> -			}
> -			if (skip_this_pass) {
> -				printf( _("Skipping pass 1 is not a good idea.\n"));
> -				skip_this_pass = FALSE;
> -				fflush(stdout);
> -			}
> -			if (fsck_system_inode(sdp, block)) {
> -				log_debug(_("Already processed system inode "
> -					    "%lld (0x%llx)\n"),
> -					  (unsigned long long)block,
> -					  (unsigned long long)block);
> -				first = 0;
> -				continue;
> -			}
> -			bh = bread(sdp, block);
> -
> -			/*log_debug( _("Checking metadata block #%" PRIu64
> -			  " (0x%" PRIx64 ")\n"), block, block);*/
> -
> -			if (gfs2_check_meta(bh, GFS2_METATYPE_DI)) {
> -				/* In gfs2, a bitmap mark of 2 means an inode,
> -				   but in gfs1 it means any metadata.  So if
> -				   this is gfs1 and not an inode, it may be
> -				   okay.  If it's non-dinode metadata, it will
> -				   be referenced by an inode, so we need to
> -				   skip it here and it will be sorted out
> -				   when the referencing inode is checked. */
> -				if (sdp->gfs1) {
> -					uint32_t check_magic;
> -
> -					check_magic = ((struct
> -							gfs2_meta_header *)
> -						       (bh->b_data))->mh_magic;
> -					if (be32_to_cpu(check_magic) ==
> -					    GFS2_MAGIC) {
> -						log_debug( _("Deferring GFS1 "
> -							     "metadata block #"
> -							     "%" PRIu64" (0x%"
> -							     PRIx64 ")\n"),
> -							   block, block);
> -						brelse(bh);
> -						first = 0;
> -						continue;
> -					}
> -				}
> -				log_err( _("Found invalid inode at block #"
> -					   "%llu (0x%llx)\n"),
> -					 (unsigned long long)block,
> -					 (unsigned long long)block);
> -				if (gfs2_blockmap_set(bl, block,
> -						      gfs2_block_free)) {
> -					stack;
> -					brelse(bh);
> -					gfs2_special_free(&gfs1_rindex_blks);
> -					return FSCK_ERROR;
> -				}
> -				check_n_fix_bitmap(sdp, block,
> -						   gfs2_block_free);
> -			} else if (handle_di(sdp, bh) < 0) {
> -				stack;
> -				brelse(bh);
> -				gfs2_special_free(&gfs1_rindex_blks);
> -				return FSCK_ERROR;
> -			}
> -			/* Ignore everything else - they should be hit by the
> -			   handle_di step.  Don't check NONE either, because
> -			   check_meta passes everything if GFS2_METATYPE_NONE
> -			   is specified.  Hopefully, other metadata types such
> -			   as indirect blocks will be handled when the inode
> -			   itself is processed, and if it's not, it should be
> -			   caught in pass5. */
> -			brelse(bh);
> -			first = 0;
> -		}
> -		/*
> -		  For GFS1, we have to count the "free meta" blocks in the
> -		  resource group and mark them specially so we can count them
> -		  properly in pass5.
> -		 */
> -		if (!sdp->gfs1)
> -			continue;
> -		first = 1;
> -		while (gfs2_next_rg_freemeta(rgd, &block, first) == 0) {
> -			gfs2_blockmap_set(bl, block, gfs2_freemeta);
> -			first = 0;
> -		}
>   	}
>   	gfs2_special_free(&gfs1_rindex_blks);
>   	return FSCK_OK;
> diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
> index 46d4d67..db31a6c 100644
> --- a/gfs2/libgfs2/libgfs2.h
> +++ b/gfs2/libgfs2/libgfs2.h
> @@ -757,12 +757,12 @@ extern int build_root(struct gfs2_sbd *sdp);
>   extern int do_init_inum(struct gfs2_sbd *sdp);
>   extern int do_init_statfs(struct gfs2_sbd *sdp);
>   extern int gfs2_check_meta(struct gfs2_buffer_head *bh, int type);
> +extern unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx,
> +			      uint64_t *buf, uint8_t state);
>   extern int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block,
>   			     int first);
>   extern int gfs2_next_rg_metatype(struct gfs2_sbd *sdp, struct rgrp_tree *rgd,
>   				 uint64_t *block, uint32_t type, int first);
> -extern int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block,
> -				 int first);
>
>   /* super.c */
>   extern int check_sb(struct gfs2_sb *sb);
> diff --git a/gfs2/libgfs2/structures.c b/gfs2/libgfs2/structures.c
> index 645c45a..f9231ca 100644
> --- a/gfs2/libgfs2/structures.c
> +++ b/gfs2/libgfs2/structures.c
> @@ -495,6 +495,24 @@ int gfs2_check_meta(struct gfs2_buffer_head *bh, int type)
>   	return 0;
>   }
>
> +unsigned lgfs2_bm_scan(struct rgrp_tree *rgd, unsigned idx, uint64_t *buf, uint8_t state)
> +{
> +	struct gfs2_bitmap *bi = &rgd->bits[idx];
> +	unsigned n = 0;
> +	uint32_t blk = 0;
> +
> +	while(blk < (bi->bi_len * GFS2_NBBY)) {
> +		blk = gfs2_bitfit((const unsigned char *)rgd->bh[idx]->b_data + bi->bi_offset,
> +				  bi->bi_len, blk, state);
> +		if (blk == BFITNOENT)
> +			break;
> +		buf[n++] = blk + (bi->bi_start * GFS2_NBBY) + rgd->ri.ri_data0;
> +		blk++;
> +	}
> +
> +	return n;
> +}
> +
>   /**
>    * gfs2_next_rg_meta
>    * @rgd:
> @@ -545,11 +563,6 @@ int gfs2_next_rg_meta(struct rgrp_tree *rgd, uint64_t *block, int first)
>   	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_DINODE);
>   }
>
> -int gfs2_next_rg_freemeta(struct rgrp_tree *rgd, uint64_t *block, int first)
> -{
> -	return __gfs2_next_rg_meta(rgd, block, first, GFS2_BLKST_UNLINKED);
> -}
> -
>   /**
>    * next_rg_metatype
>    * @rgd:
>
>




More information about the Cluster-devel mailing list