[Cluster-devel] [PATCH 3/4] mkfs.gfs2: Create new resource groups on-demand

Andrew Price anprice at redhat.com
Thu Jun 6 12:03:32 UTC 2013


Adds a structure to encapsulate the state of resource group creation so
that the new rg_append() function can be called at any time. rg_append()
takes the number of data blocks required as a parameter so that future
enhancements can request a given number of free data blocks.

place_rgrps() now becomes a simple loop to call rg_append() and
writerg() until the end of the device.

Signed-off-by: Andrew Price <anprice at redhat.com>
---
 gfs2/mkfs/main_mkfs.c | 250 ++++++++++++++++++++++++++++----------------------
 1 file changed, 139 insertions(+), 111 deletions(-)

diff --git a/gfs2/mkfs/main_mkfs.c b/gfs2/mkfs/main_mkfs.c
index 058e4fa..dcfb032 100644
--- a/gfs2/mkfs/main_mkfs.c
+++ b/gfs2/mkfs/main_mkfs.c
@@ -134,6 +134,25 @@ struct mkfs_dev {
 	unsigned int got_topol:1;
 };
 
+/**
+ * A representation of the state of resource group calculation. Allows mkfs to create
+ * resource groups at any point instead of creating them all in one batch.
+ */
+struct mkfs_rgs {
+	struct osi_root *root;
+	uint64_t nextaddr;
+	unsigned bsize;
+	unsigned long align;
+	unsigned long align_off;
+	unsigned long curr_offset;
+	uint64_t maxrgsz;
+	uint64_t minrgsz;
+	uint64_t devlen;
+	uint64_t rgsize;
+	uint64_t count;
+	uint64_t blks_total;
+};
+
 static void opts_init(struct mkfs_opts *opts)
 {
 	memset(opts, 0, sizeof(*opts));
@@ -499,12 +518,6 @@ static void opts_check(struct mkfs_opts *opts)
 		fprintf(stderr, _("Stripe unit and stripe width must be specified together\n"));
 		exit(1);
 	}
-
-	if (opts->got_sunit && (opts->swidth % opts->sunit)) {
-		fprintf(stderr, _("Stripe width (%lu) must be a multiple of stripe unit (%lu)\n"),
-				opts->swidth, opts->sunit);
-		exit(1);
-	}
 }
 
 static void print_results(struct gfs2_sbd *sdp, uint64_t real_device_size,
@@ -587,96 +600,125 @@ static uint64_t align_block(const uint64_t base, const uint64_t align, const uin
 	return base;
 }
 
-static int place_rgrps(struct gfs2_sbd *sdp, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
+static void rgs_init(struct mkfs_rgs *rgs, struct mkfs_opts *opts, struct mkfs_dev *dev, struct gfs2_sbd *sdp)
 {
-	struct rgrp_tree *rgt = NULL;
-	uint64_t rgaddr = 0;
-	uint64_t nextaddr = 0;
-	uint64_t rglen = (sdp->rgsize << 20) / sdp->bsize;
-	const uint64_t maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
-	const uint64_t minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
-	unsigned sunit_blocks = opts->sunit / sdp->bsize;
-	unsigned swidth_blocks = opts->swidth / opts->bsize;
-	unsigned stripe_offset = 0;
-	int err = 0;
-
-	sdp->new_rgrps = 0;
-	rgaddr = align_block(sdp->sb_addr + 1, swidth_blocks, sdp->bsize);
-
-	while (rgaddr > 0) {
-		rgt = rgrp_insert(&sdp->rgtree, rgaddr);
-		if (rgt == NULL)
-			return -1;
-
-		stripe_offset += sunit_blocks;
-		if (stripe_offset >= swidth_blocks)
-			stripe_offset = 0;
-
-		/* The next rg might not fit into the remaining space so calculate it now
-		   in order to make decisions about the current rg */
-		nextaddr = align_block(rgaddr + rglen, swidth_blocks, sdp->bsize) + stripe_offset;
-		if (!opts->got_rgsize && (nextaddr - rgaddr) <= maxrgsz)
-			/* Use up gap left by alignment if possible */
-			rgt->length = nextaddr - rgaddr;
-		else
-			rgt->length = rglen;
-
-		/* If the next rg would overflow the device, either shrink it or expand
-		   the current rg to use the remaining space */
-		if (nextaddr + rglen > sdp->device.length) {
-			/* Squeeze the last 1 or 2 rgs into the remaining space */
-			if ((nextaddr < sdp->device.length) && (sdp->device.length - nextaddr >= minrgsz)) {
-				rglen = sdp->device.length - nextaddr;
-			} else {
-				if (sdp->device.length - rgaddr <= maxrgsz)
-					rgt->length = sdp->device.length - rgaddr;
-				else
-					rgt->length = maxrgsz;
-				/* This is the last rg */
-				nextaddr = 0;
-			}
+	memset(rgs, 0, sizeof(*rgs));
+	if (opts->got_sunit) {
+		if ((opts->sunit % sdp->bsize) != 0) {
+			fprintf(stderr, _("Stripe unit (%lu) must be a multiple of block size (%u)\n"),
+			        opts->sunit, sdp->bsize);
+			exit(1);
+		} else if ((opts->swidth % opts->sunit) != 0) {
+			fprintf(stderr, _("Stripe width (%lu) must be a multiple of stripe unit (%lu)\n"),
+			        opts->swidth, opts->sunit);
+			exit(1);
+		} else {
+			rgs->align = opts->swidth / sdp->bsize;
+			rgs->align_off = opts->sunit / sdp->bsize;
 		}
-
-		/* Build the rindex entry */
-		rgt->ri.ri_length = rgblocks2bitblocks(sdp->bsize, rgt->length, &rgt->ri.ri_data);
-		rgt->ri.ri_addr = rgaddr;
-		rgt->ri.ri_data0 = rgaddr + rgt->ri.ri_length;
-		rgt->ri.ri_bitbytes = rgt->ri.ri_data / GFS2_NBBY;
-
-		/* Build the rgrp header */
-		memset(&rgt->rg, 0, sizeof(rgt->rg));
-		rgt->rg.rg_header.mh_magic = GFS2_MAGIC;
-		rgt->rg.rg_header.mh_type = GFS2_METATYPE_RG;
-		rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
-		rgt->rg.rg_free = rgt->ri.ri_data;
-
-		if (opts->debug) {
-			gfs2_rindex_print(&rgt->ri);
-			printf(" stripe_offset: %u\n", stripe_offset);
+	} else {
+		if ((dev->minimum_io_size > dev->physical_sector_size) &&
+		    (dev->optimal_io_size > dev->physical_sector_size)) {
+			rgs->align = dev->optimal_io_size / sdp->bsize;
+			rgs->align_off = dev->minimum_io_size / sdp->bsize;
 		}
+	}
+	rgs->bsize = sdp->bsize;
+	rgs->maxrgsz = (GFS2_MAX_RGSIZE << 20) / sdp->bsize;
+	rgs->minrgsz = (GFS2_MIN_RGSIZE << 20) / sdp->bsize;
+	rgs->nextaddr = align_block(sdp->sb_addr + 1, rgs->align, sdp->bsize);
+	rgs->rgsize = (sdp->rgsize << 20) / sdp->bsize;
+	rgs->devlen = sdp->device.length;
+	rgs->root = &sdp->rgtree;
+}
 
-		/* TODO: This call allocates buffer heads and bitmap pointers
-		 * in rgt. We really shouldn't need to do that. */
-		err = gfs2_compute_bitstructs(sdp->bsize, rgt);
-		if (err != 0) {
-			fprintf(stderr, _("Could not compute bitmaps. "
-			        "Check resource group and block size options.\n"));
-			return -1;
+static unsigned rgsize_for_data(uint64_t blksreq, unsigned bsize)
+{
+        const uint32_t blks_rgrp = GFS2_NBBY * (bsize - sizeof(struct gfs2_rgrp));
+        const uint32_t blks_meta = GFS2_NBBY * (bsize - sizeof(struct gfs2_meta_header));
+	unsigned bitblocks = 1;
+	if (blksreq > blks_rgrp)
+		bitblocks += ((blksreq - blks_rgrp) + blks_meta) / blks_meta;
+	return bitblocks + blksreq;
+}
+
+static struct rgrp_tree *rg_append(struct mkfs_rgs *rgs, const struct mkfs_opts *opts, uint64_t freerq)
+{
+	int err = 0;
+	uint64_t length = rgsize_for_data(freerq, rgs->bsize);
+	struct rgrp_tree *rgt = rgrp_insert(rgs->root, rgs->nextaddr);
+	if (rgt == NULL)
+		return NULL;
+
+	rgs->curr_offset += rgs->align_off;
+	if (rgs->curr_offset >= rgs->align)
+		rgs->curr_offset = 0;
+
+	if (rgs->rgsize > length)
+		length = rgs->rgsize;
+
+	rgs->nextaddr = align_block(rgt->ri.ri_addr + rgs->rgsize, rgs->align, rgs->bsize) + rgs->curr_offset;
+	/* Use up gap left by alignment if possible */
+	if (!opts->got_rgsize && ((rgs->nextaddr - rgt->ri.ri_addr) <= rgs->maxrgsz))
+		length = rgs->nextaddr - rgt->ri.ri_addr;
+
+	if ((rgs->nextaddr + rgs->rgsize) > rgs->devlen) {
+		/* Squeeze the last 1 or 2 rgs into the remaining space */
+		if ((rgs->nextaddr < rgs->devlen) && ((rgs->devlen - rgs->nextaddr) >= rgs->minrgsz)) {
+			rgs->rgsize = rgs->devlen - rgs->nextaddr;
+		} else {
+			if (rgs->devlen - rgt->ri.ri_addr <= rgs->maxrgsz)
+				length = rgs->devlen - rgt->ri.ri_addr;
+			else
+				length = rgs->maxrgsz;
+			/* This is the last rg */
+			rgs->nextaddr = 0;
 		}
+	}
+
+	rgt->ri.ri_length = rgblocks2bitblocks(rgs->bsize, length, &rgt->ri.ri_data);
+	rgt->ri.ri_data0 = rgt->ri.ri_addr + rgt->ri.ri_length;
+	rgt->ri.ri_bitbytes = rgt->ri.ri_data / GFS2_NBBY;
+	rgt->rg.rg_header.mh_magic = GFS2_MAGIC;
+	rgt->rg.rg_header.mh_type = GFS2_METATYPE_RG;
+	rgt->rg.rg_header.mh_format = GFS2_FORMAT_RG;
+	rgt->rg.rg_free = rgt->ri.ri_data;
 
-		err = writerg(sdp->device_fd, rgt, sdp->bsize);
+	if (opts->debug) {
+		gfs2_rindex_print(&rgt->ri);
+		printf(" offset: %"PRIu64"\n", rgs->curr_offset);
+	}
+
+	err = gfs2_compute_bitstructs(rgs->bsize, rgt);
+	if (err != 0) {
+		fprintf(stderr, _("Could not compute bitmaps. "
+			"Check resource group and block size options.\n"));
+		return NULL;
+	}
+	rgs->blks_total += rgt->ri.ri_data;
+	rgs->count++;
+	return rgt;
+}
+
+static uint64_t place_rgrps(struct mkfs_rgs *rgs, int fd, const struct mkfs_opts *opts, const struct mkfs_dev *dev)
+{
+	int err = 0;
+	struct rgrp_tree *rgt = NULL;
+
+	while (rgs->nextaddr > 0) {
+		rgt = rg_append(rgs, opts, 0);
+		if (rgt == NULL) {
+			perror(_("Failed to create resource group"));
+			return 0;
+		}
+		err = writerg(fd, rgt, rgs->bsize);
 		if (err != 0) {
 			perror(_("Failed to write resource group"));
-			return -1;
+			return 0;
 		}
-		sdp->new_rgrps++;
-		sdp->blks_total += rgt->ri.ri_data;
-		rgaddr = nextaddr;
 	}
 
-	sdp->rgrps = sdp->new_rgrps;
-	sdp->fssize = rgt->ri.ri_data0 + rgt->ri.ri_data;
-	return 0;
+	return rgt->ri.ri_data0 + rgt->ri.ri_data;
 }
 
 static void sbd_init(struct gfs2_sbd *sdp, struct mkfs_opts *opts, struct mkfs_dev *dev, unsigned bsize)
@@ -793,29 +835,12 @@ static void open_dev(const char *path, struct mkfs_dev *dev)
 		exit(1);
 }
 
-static void opts_set_stripe(struct mkfs_opts *opts, const struct mkfs_dev *dev, unsigned bsize)
-{
-	if (!opts->got_swidth && dev->optimal_io_size > dev->physical_sector_size) {
-		opts->swidth = dev->optimal_io_size;
-		opts->got_swidth = 1;
-	}
-
-	if (!opts->got_sunit && dev->minimum_io_size > dev->physical_sector_size) {
-		opts->sunit = dev->minimum_io_size;
-		opts->got_sunit = 1;
-	}
-
-	if (opts->got_sunit && (opts->sunit % bsize) != 0) {
-		fprintf(stderr, "Stripe unit (%lu) is not a multiple of the block size (%u)\n", opts->sunit, bsize);
-		exit(1);
-	}
-}
-
 void main_mkfs(int argc, char *argv[])
 {
 	struct gfs2_sbd sbd;
 	struct mkfs_opts opts;
 	struct mkfs_dev dev;
+	struct mkfs_rgs rgs;
 	int error;
 	unsigned char uuid[16];
 	unsigned bsize;
@@ -826,17 +851,16 @@ void main_mkfs(int argc, char *argv[])
 
 	open_dev(opts.device, &dev);
 	bsize = choose_blocksize(&opts, &dev);
-	opts_set_stripe(&opts, &dev, bsize);
 
 	if (S_ISREG(dev.stat.st_mode)) {
 		opts.got_bsize = 1; /* Use default block size for regular files */
 	}
 
-	warn_of_destruction(opts.device);
-
 	sbd_init(&sbd, &opts, &dev, bsize);
+	rgs_init(&rgs, &opts, &dev, &sbd);
+
 	if (opts.debug) {
-		printf(_("Calculated file system options:\n"));
+		printf(_("File system options:\n"));
 		printf("  bsize = %u\n", sbd.bsize);
 		printf("  qcsize = %u\n", sbd.qcsize);
 		printf("  jsize = %u\n", sbd.jsize);
@@ -847,20 +871,24 @@ void main_mkfs(int argc, char *argv[])
 		printf("  fssize = %"PRIu64"\n", opts.fssize);
 		printf("  sunit = %lu\n", opts.sunit);
 		printf("  swidth = %lu\n", opts.swidth);
-		printf("  rgrp align = %lu+%lu blocks\n", opts.swidth/sbd.bsize, opts.sunit/sbd.bsize);
+		printf("  rgrp align = %lu+%lu blocks\n", rgs.align, rgs.align_off);
 	}
 
+	warn_of_destruction(opts.device);
+
 	if (opts.confirm && !opts.override)
 		are_you_sure();
 
 	if (!S_ISREG(dev.stat.st_mode) && opts.discard)
-		discard_blocks(dev.fd, sbd.bsize * sbd.device.length, opts.debug);
+		discard_blocks(dev.fd, dev.size, opts.debug);
 
-	error = place_rgrps(&sbd, &opts, &dev);
-	if (error) {
+	sbd.fssize = place_rgrps(&rgs, sbd.device_fd, &opts, &dev);
+	if (sbd.fssize == 0) {
 		fprintf(stderr, _("Failed to build resource groups\n"));
 		exit(1);
 	}
+	sbd.blks_total = rgs.blks_total;
+	sbd.rgrps = rgs.count;
 	build_root(&sbd);
 	build_master(&sbd);
 	error = build_jindex(&sbd);
-- 
1.8.1.4




More information about the Cluster-devel mailing list