[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [Cluster-devel] [PATCH] mkfs.gfs2: Move the new rgrp creation code into libgfs2



Hi,

On Tue, 2013-06-11 at 18:18 +0100, Andrew Price wrote:
> Adapt and move the new resource group creation and writing code into
> libgfs2. The new APIs have been created to no longer expose the
> underlying container (i.e. osi_tree.h bits) to users, and to reduce
> dependency on the gfs2_sbd structure which has traditionally been the
> dumping ground for long-lived state.
> 
> It is intended to convert the rest of the mkfs code to use these APIs
> and then migrate the other tools to use them gradually and eventually
> hide the rgrp_tree structure and remove the old functions and structures
> which the new ones obsolete.
> 
One thought is that we should separate the functions into those which do
calculations of where to put things (useful also in fsck) and those
which actually do the writing of the rgrps (most likely only useful in
mkfs and grow). Otherwise looks good to me,

Steve.

> Signed-off-by: Andrew Price <anprice redhat com>
> ---
>  gfs2/libgfs2/libgfs2.h |  17 +++++
>  gfs2/libgfs2/rgrp.c    | 179 +++++++++++++++++++++++++++++++++++++++++++++++++
>  gfs2/mkfs/main_mkfs.c  | 170 +++++++++++-----------------------------------
>  3 files changed, 236 insertions(+), 130 deletions(-)
> 
> diff --git a/gfs2/libgfs2/libgfs2.h b/gfs2/libgfs2/libgfs2.h
> index 8502abf..fe7129a 100644
> --- a/gfs2/libgfs2/libgfs2.h
> +++ b/gfs2/libgfs2/libgfs2.h
> @@ -189,6 +189,23 @@ struct rgrp_tree {
>  	struct gfs2_buffer_head **bh;
>  };
>  
> +struct lgfs2_rgrp_align {
> +	uint64_t base;
> +	uint64_t offset;
> +};
> +
> +typedef struct rgrp_tree *lgfs2_rgrp_t;
> +typedef struct _lgfs2_rgrps *lgfs2_rgrps_t;
> +
> +extern lgfs2_rgrps_t lgfs2_rgrps_init(unsigned bsize, uint64_t start, uint64_t devlen, uint32_t rglen, struct lgfs2_rgrp_align *al);
> +extern unsigned lgfs2_rgsize_for_data(uint64_t blksreq, unsigned bsize);
> +extern lgfs2_rgrp_t lgfs2_rgrp_append(lgfs2_rgrps_t rgs, uint32_t rglen, int expand);
> +extern int lgfs2_rgrp_write(int fd, lgfs2_rgrp_t rg, unsigned bsize);
> +extern int lgfs2_rgrps_end(lgfs2_rgrps_t rgs);
> +extern struct gfs2_rindex *lgfs2_rgrp_index(lgfs2_rgrp_t rg);
> +// Temporary function to aid API migration
> +extern struct osi_node *lgfs2_rgrps_root(lgfs2_rgrps_t rgs) __attribute__((deprecated));
> +
>  struct gfs2_buffer_head {
>  	osi_list_t b_altlist; /* alternate list */
>  	uint64_t b_blocknr;
> diff --git a/gfs2/libgfs2/rgrp.c b/gfs2/libgfs2/rgrp.c
> index f2b8304..8fe7b6d 100644
> --- a/gfs2/libgfs2/rgrp.c
> +++ b/gfs2/libgfs2/rgrp.c
> @@ -224,3 +224,182 @@ void gfs2_rgrp_free(struct osi_root *rgrp_tree)
>  		free(rgd);
>  	}
>  }
> +
> +/**
> + * This structure is defined in libgfs2.h as an opaque type. It stores the
> + * constants and context required for creating resource groups from any point
> + * in an application.
> + */
> +struct _lgfs2_rgrps {
> +	struct osi_root root;
> +	uint64_t nextaddr;
> +	unsigned bsize;
> +	unsigned long align;
> +	unsigned long align_off;
> +	unsigned long curr_offset;
> +	uint64_t maxrgsz;
> +	uint64_t minrgsz;
> +	uint64_t devlen;
> +	uint64_t count;
> +	uint64_t blks_total;
> +	uint32_t rgsize;
> +};
> +
> +static uint64_t align_block(const uint64_t base, const uint64_t align)
> +{
> +	if ((align > 0) && ((base % align) > 0))
> +		return (base - (base % align)) + align;
> +	return base;
> +}
> +
> +/**
> + * Create and initialise an empty set of resource groups
> + * bsize: The block size of the fs
> + * start: The block address of the first resource group
> + * devlen: The length of the device, in fs blocks
> + * rglen: Default rg size, in blocks
> + * al: The required alignment of the resource groups
> + * Returns an initialised lgfs2_rgrps_t or NULL if unsuccessful with errno set
> + */
> +lgfs2_rgrps_t lgfs2_rgrps_init(unsigned bsize, uint64_t start, uint64_t devlen, uint32_t rglen, struct lgfs2_rgrp_align *al)
> +{
> +	lgfs2_rgrps_t rgs = calloc(1, sizeof(*rgs));
> +	if (rgs == NULL)
> +		return NULL;
> +
> +	rgs->bsize = bsize;
> +	rgs->maxrgsz = (GFS2_MAX_RGSIZE << 20) / bsize;
> +	rgs->minrgsz = (GFS2_MIN_RGSIZE << 20) / bsize;
> +	rgs->rgsize = rglen;
> +	rgs->devlen = devlen;
> +	rgs->align = al->base;
> +	rgs->align_off = al->offset;
> +	memset(&rgs->root, 0, sizeof(rgs->root));
> +	rgs->nextaddr = align_block(start, rgs->align);
> +
> +	return rgs;
> +}
> +
> +/**
> + * Return the rindex structure relating to a a resource group.
> + */
> +struct gfs2_rindex *lgfs2_rgrp_index(lgfs2_rgrp_t rg)
> +{
> +	return &rg->ri;
> +}
> +
> +/**
> + * Return non-zero if there is space left for more resource groups or zero if not
> + */
> +int lgfs2_rgrps_end(lgfs2_rgrps_t rgs)
> +{
> +	return (rgs->nextaddr == 0);
> +}
> +
> +/**
> + * Returns the total resource group size, in blocks, required to give blksreq data blocks
> + */
> +unsigned lgfs2_rgsize_for_data(uint64_t blksreq, unsigned bsize)
> +{
> +	const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp));
> +	const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header));
> +	unsigned bitblocks = 1;
> +	if (blksreq > blks_rgrp)
> +		bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta;
> +	return bitblocks + blksreq;
> +}
> +
> +// Temporary function to aid in API migration
> +struct osi_node *lgfs2_rgrps_root(lgfs2_rgrps_t rgs)
> +{
> +	return rgs->root.osi_node;
> +}
> +
> +/**
> + * Create a new resource group after the last resource group in a set.
> + * rgs: The set of resource groups
> + * rglen: The required length of the resource group. If its is 0 the default rgsize
> + *        passed to lgfs2_rgrps_init() is used.
> + * expand: Whether to expand the resource group when alignment would leave a gap.
> + * Returns the new resource group on success or NULL on failure.
> + */
> +lgfs2_rgrp_t lgfs2_rgrp_append(lgfs2_rgrps_t rgs, uint32_t rglen, int expand)
> +{
> +	int err = 0;
> +	lgfs2_rgrp_t rg = rgrp_insert(&rgs->root, rgs->nextaddr);
> +	if (rg == NULL)
> +		return NULL;
> +
> +	rgs->curr_offset += rgs->align_off;
> +	if (rgs->curr_offset >= rgs->align)
> +		rgs->curr_offset = 0;
> +
> +	if (rgs->rgsize > rglen)
> +		rglen = rgs->rgsize;
> +
> +	rgs->nextaddr = align_block(rg->ri.ri_addr + rgs->rgsize, rgs->align) + rgs->curr_offset;
> +	/* Use up gap left by alignment if possible */
> +	if (expand && ((rgs->nextaddr - rg->ri.ri_addr) <= rgs->maxrgsz))
> +		rglen = rgs->nextaddr - rg->ri.ri_addr;
> +
> +	if ((rgs->nextaddr + rgs->rgsize) > rgs->devlen) {
> +		/* Squeeze the last 1 or 2 rgs into the remaining space */
> +		if ((rgs->nextaddr < rgs->devlen) && ((rgs->devlen - rgs->nextaddr) >= rgs->minrgsz)) {
> +			rgs->rgsize = rgs->devlen - rgs->nextaddr;
> +		} else {
> +			if (rgs->devlen - rg->ri.ri_addr <= rgs->maxrgsz)
> +				rglen = rgs->devlen - rg->ri.ri_addr;
> +			else
> +				rglen = rgs->maxrgsz;
> +			/* This is the last rg */
> +			rgs->nextaddr = 0;
> +		}
> +	}
> +
> +	rg->ri.ri_length = rgblocks2bitblocks(rgs->bsize, rglen, &rg->ri.ri_data);
> +	rg->ri.ri_data0 = rg->ri.ri_addr + rg->ri.ri_length;
> +	rg->ri.ri_bitbytes = rg->ri.ri_data / GFS2_NBBY;
> +	rg->rg.rg_header.mh_magic = GFS2_MAGIC;
> +	rg->rg.rg_header.mh_type = GFS2_METATYPE_RG;
> +	rg->rg.rg_header.mh_format = GFS2_FORMAT_RG;
> +	rg->rg.rg_free = rg->ri.ri_data;
> +
> +	err = gfs2_compute_bitstructs(rgs->bsize, rg);
> +	if (err != 0)
> +		return NULL;
> +	rgs->blks_total += rg->ri.ri_data;
> +	rgs->count++;
> +	return rg;
> +}
> +
> +/**
> + * Write a resource group to a file descriptor.
> + * Returns 0 on success or non-zero on failure with errno set
> + */
> +int lgfs2_rgrp_write(int fd, lgfs2_rgrp_t rg, unsigned bsize)
> +{
> +	ssize_t ret = 0;
> +	size_t len = rg->ri.ri_length * bsize;
> +	unsigned int i;
> +	const struct gfs2_meta_header bmh = {
> +		.mh_magic = GFS2_MAGIC,
> +		.mh_type = GFS2_METATYPE_RB,
> +		.mh_format = GFS2_FORMAT_RB,
> +	};
> +	char *buff = calloc(len, 1);
> +	if (buff == NULL)
> +		return -1;
> +
> +	gfs2_rgrp_out(&rg->rg, buff);
> +	for (i = 1; i < rg->ri.ri_length; i++)
> +		gfs2_meta_header_out(&bmh, buff + (i * bsize));
> +
> +	ret = pwrite(fd, buff, len, rg->ri.ri_addr * bsize);
> +	if (ret != len) {
> +		free(buff);
> +		return -1;
> +	}
> +
> +	free(buff);
> +	return 0;
> +}
> diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
> index a5078b6..0d84064 100644
> --- a/gfs2/mkfs/main_mkfs.c
> +++ b/gfs2/mkfs/main_mkfs.c
> @@ -598,46 +598,12 @@ static void warn_of_destruction(const char *path)
>  	free(abspath);
>  }
>  
> -static int writerg(int fd, const struct rgrp_tree *rgt, const unsigned bsize)
> +static lgfs2_rgrps_t rgs_init(struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
>  {
> -	ssize_t ret = 0;
> -	unsigned int i;
> -	const struct gfs2_meta_header bmh = {
> -		.mh_magic = GFS2_MAGIC,
> -		.mh_type = GFS2_METATYPE_RB,
> -		.mh_format = GFS2_FORMAT_RB,
> -	};
> -	struct iovec iov = {
> -		.iov_len = rgt->ri.ri_length * bsize,
> -		.iov_base = calloc(rgt->ri.ri_length, bsize),
> -	};
> -	if (iov.iov_base == NULL)
> -		return -1;
> -
> -	gfs2_rgrp_out(&rgt->rg, iov.iov_base);
> -	for (i = 1; i < rgt->ri.ri_length; i++)
> -		gfs2_meta_header_out(&bmh, (char *)iov.iov_base + (i * bsize));
> -
> -	ret = pwritev(fd, &iov, 1, rgt->ri.ri_addr * bsize);
> -	if (ret != iov.iov_len) {
> -		free(iov.iov_base);
> -		return -1;
> -	}
> -
> -	free(iov.iov_base);
> -	return 0;
> -}
> +	uint64_t rgsize = (opts->rgsize << 20) / sdp->bsize;
> +	struct lgfs2_rgrp_align align = {.base = 0, .offset = 0};
> +	lgfs2_rgrps_t rgs;
>  
> -static uint64_t align_block(const uint64_t base, const uint64_t align)
> -{
> -	if ((align > 0) && ((base % align) > 0))
> -		return (base - (base % align)) + align;
> -	return base;
> -}
> -
> -static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
> -{
> -	memset(rgs, 0, sizeof(*rgs));
>  	if (opts->align && opts->got_sunit) {
>  		if ((opts->sunit % sdp->bsize) != 0) {
>  			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
> @@ -648,112 +614,64 @@ static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_d
>  			        opts->swidth, opts->sunit);
>  			exit(1);
>  		} else {
> -			rgs->align = opts->swidth / sdp->bsize;
> -			rgs->align_off = opts->sunit / sdp->bsize;
> +			align.base = opts->swidth / sdp->bsize;
> +			align.offset = opts->sunit / sdp->bsize;
>  		}
>  	} else if (opts->align) {
>  		if ((dev->minimum_io_size > dev->physical_sector_size) &&
>  		    (dev->optimal_io_size > dev->physical_sector_size)) {
> -			rgs->align = dev->optimal_io_size / sdp->bsize;
> -			rgs->align_off = dev->minimum_io_size / sdp->bsize;
> +			align.base = dev->optimal_io_size / sdp->bsize;
> +			align.offset = dev->minimum_io_size / sdp->bsize;
>  		}
>  	}
> -	rgs->bsize = sdp->bsize;
> -	rgs->maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
> -	rgs->minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
> -	rgs->nextaddr = align_block(sdp->sb_addr + 1, rgs->align);
> -	rgs->rgsize = (sdp->rgsize << 20) / sdp->bsize;
> -	rgs->devlen = sdp->device.length;
> -	rgs->root = &sdp->rgtree;
> -}
> -
> -static unsigned rgsize_for_data(uint64_t blksreq, unsigned bsize)
> -{
> -        const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp));
> -        const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header));
> -	unsigned bitblocks = 1;
> -	if (blksreq > blks_rgrp)
> -		bitblocks += ((blksreq - blks_rgrp) + blks_meta - 1) / blks_meta;
> -	return bitblocks + blksreq;
> -}
>  
> -static struct rgrp_tree *rg_append(struct mkfs_rgs *rgs, const struct mkfs_opts *opts, uint64_t freerq)
> -{
> -	int err = 0;
> -	uint64_t length = rgsize_for_data(freerq, rgs->bsize);
> -	struct rgrp_tree *rgt = rgrp_insert(rgs->root, rgs->nextaddr);
> -	if (rgt == NULL)
> -		return NULL;
> -
> -	rgs->curr_offset += rgs->align_off;
> -	if (rgs->curr_offset >= rgs->align)
> -		rgs->curr_offset = 0;
> -
> -	if (rgs->rgsize > length)
> -		length = rgs->rgsize;
> -
> -	rgs->nextaddr = align_block(rgt->ri.ri_addr + rgs->rgsize, rgs->align) + rgs->curr_offset;
> -	/* Use up gap left by alignment if possible */
> -	if (!opts->got_rgsize && ((rgs->nextaddr - rgt->ri.ri_addr) <= rgs->maxrgsz))
> -		length = rgs->nextaddr - rgt->ri.ri_addr;
> -
> -	if ((rgs->nextaddr + rgs->rgsize) > rgs->devlen) {
> -		/* Squeeze the last 1 or 2 rgs into the remaining space */
> -		if ((rgs->nextaddr < rgs->devlen) && ((rgs->devlen - rgs->nextaddr) >= rgs->minrgsz)) {
> -			rgs->rgsize = rgs->devlen - rgs->nextaddr;
> -		} else {
> -			if (rgs->devlen - rgt->ri.ri_addr <= rgs->maxrgsz)
> -				length = rgs->devlen - rgt->ri.ri_addr;
> -			else
> -				length = rgs->maxrgsz;
> -			/* This is the last rg */
> -			rgs->nextaddr = 0;
> -		}
> +	rgs = lgfs2_rgrps_init(sdp->bsize, sdp->sb_addr + 1, sdp->device.length, rgsize, &align);
> +	if (rgs == NULL) {
> +		perror(_("Could not initialise resource groups"));
> +		exit(-1);
>  	}
>  
> -	rgt->ri.ri_length = rgblocks2bitblocks(rgs->bsize, length, &rgt->ri.ri_data);
> -	rgt->ri.ri_data0 = rgt->ri.ri_addr + rgt->ri.ri_length;
> -	rgt->ri.ri_bitbytes = rgt->ri.ri_data / GFS2_NBBY;
> -	rgt->rg.rg_header.mh_magic = GFS2_MAGIC;
> -	rgt->rg.rg_header.mh_type = GFS2_METATYPE_RG;
> -	rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
> -	rgt->rg.rg_free = rgt->ri.ri_data;
> -
>  	if (opts->debug) {
> -		gfs2_rindex_print(&rgt->ri);
> -		printf(" offset: %"PRIu64"\n", rgs->curr_offset);
> +		printf("  rgrp align = ");
> +		if (opts->align)
> +			printf("%lu+%lu blocks\n", align.base, align.offset);
> +		else
> +			printf("(disabled)\n");
>  	}
>  
> -	err = gfs2_compute_bitstructs(rgs->bsize, rgt);
> -	if (err != 0) {
> -		fprintf(stderr, _("Could not compute bitmaps. "
> -			"Check resource group and block size options.\n"));
> -		return NULL;
> -	}
> -	rgs->blks_total += rgt->ri.ri_data;
> -	rgs->count++;
> -	return rgt;
> +	return rgs;
>  }
>  
> -static uint64_t place_rgrps(struct mkfs_rgs *rgs, int fd, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
> +static uint64_t place_rgrps(struct gfs2_sbd *sdp, lgfs2_rgrps_t rgs, struct mkfs_opts *opts, const struct mkfs_dev *dev)
>  {
>  	int err = 0;
> -	struct rgrp_tree *rgt = NULL;
> +	lgfs2_rgrp_t rg = NULL;
> +	struct gfs2_rindex *ri = NULL;
>  
> -	while (rgs->nextaddr > 0) {
> -		rgt = rg_append(rgs, opts, 0);
> -		if (rgt == NULL) {
> +	while (!lgfs2_rgrps_end(rgs)) {
> +		rg = lgfs2_rgrp_append(rgs, 0, !opts->got_rgsize);
> +		if (rg == NULL) {
>  			perror(_("Failed to create resource group"));
>  			return 0;
>  		}
> -		err = writerg(fd, rgt, rgs->bsize);
> +		err = lgfs2_rgrp_write(sdp->device_fd, rg, sdp->bsize);
>  		if (err != 0) {
>  			perror(_("Failed to write resource group"));
>  			return 0;
>  		}
> +		ri = lgfs2_rgrp_index(rg);
> +		if (opts->debug) {
> +			gfs2_rindex_print(ri);
> +			printf("\n");
> +		}
> +		sdp->blks_total += ri->ri_data;
> +		sdp->rgrps++;
>  	}
>  
> -	return rgt->ri.ri_data0 + rgt->ri.ri_data;
> +	if (ri == NULL)
> +		return 0;
> +	else
> +		return ri->ri_data0 + ri->ri_data;
>  }
>  
>  static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
> @@ -875,7 +793,7 @@ void main_mkfs(int argc, char *argv[])
>  	struct gfs2_sbd sbd;
>  	struct mkfs_opts opts;
>  	struct mkfs_dev dev;
> -	struct mkfs_rgs rgs;
> +	lgfs2_rgrps_t rgs;
>  	int error;
>  	unsigned char uuid[16];
>  	unsigned bsize;
> @@ -892,8 +810,6 @@ void main_mkfs(int argc, char *argv[])
>  	}
>  
>  	sbd_init(&sbd, &opts, &dev, bsize);
> -	rgs_init(&rgs, &opts, &dev, &sbd);
> -
>  	if (opts.debug) {
>  		printf(_("File system options:\n"));
>  		printf("  bsize = %u\n", sbd.bsize);
> @@ -906,13 +822,8 @@ void main_mkfs(int argc, char *argv[])
>  		printf("  fssize = %"PRIu64"\n", opts.fssize);
>  		printf("  sunit = %lu\n", opts.sunit);
>  		printf("  swidth = %lu\n", opts.swidth);
> -		printf("  rgrp align = ");
> -		if (opts.align)
> -			printf("%lu+%lu blocks\n", rgs.align, rgs.align_off);
> -		else
> -			printf("(disabled)\n");
>  	}
> -
> +	rgs = rgs_init(&opts, &dev, &sbd);
>  	warn_of_destruction(opts.device);
>  
>  	if (opts.confirm && !opts.override)
> @@ -921,13 +832,12 @@ void main_mkfs(int argc, char *argv[])
>  	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
>  		discard_blocks(dev.fd, dev.size, opts.debug);
>  
> -	sbd.fssize = place_rgrps(&rgs, sbd.device_fd, &opts, &dev);
> +	sbd.fssize = place_rgrps(&sbd, rgs, &opts, &dev);
>  	if (sbd.fssize == 0) {
>  		fprintf(stderr, _("Failed to build resource groups\n"));
>  		exit(1);
>  	}
> -	sbd.blks_total = rgs.blks_total;
> -	sbd.rgrps = rgs.count;
> +	sbd.rgtree.osi_node = lgfs2_rgrps_root(rgs); // Temporary
>  	build_root(&sbd);
>  	build_master(&sbd);
>  	error = build_jindex(&sbd);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]