[dm-devel] Re: [PATCH] device-mapper: Bad Block relocation target

Andrew Morton akpm at linux-foundation.org
Sat Feb 10 01:23:04 UTC 2007


On Sat, 10 Feb 2007 01:01:57 +0000 (GMT)
Daniel Drake <dsd at gentoo.org> wrote:

> The BBR target is designed to remap I/O write failures to another safe
> location on disk. Note that most disk drives have BBR built into them,
> this means that our software BBR will be only activated when all hardware
> BBR replacement sectors have been used.
> 
> This patch is included in the EVMS tarball, and Gentoo have been shipping
> it as part of the default kernel for a long time. I know that some users
> are running it, and haven't seen any reported bugs in a long time.
> 
> Please consider this for -mm and possible later inclusion in mainline.
> 
> I haven't been involved in the development of this target, but have brushed
> it up a little from the one included in the EVMS tarball.
> 
> Kevin, I see from mailing list archives that you were somehow involved in
> this project. Can you clarify who the main developers were so that we can
> include this in the history if it gets merged?
>

Some minorish things..
 
> +static struct workqueue_struct *dm_bbr_wq = NULL;

Unneeded initialisation.

> +static struct bbr_private *bbr_alloc_private(void)
> +{
> +	struct bbr_private *bbr_id;
> +
> +	bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
> +	if (bbr_id) {
> +		memset(bbr_id, 0, sizeof(*bbr_id));

kzalloc.

> +		INIT_WORK(&bbr_id->remap_work, bbr_remap_handler, bbr_id);
> +		bbr_id->remap_root_lock = SPIN_LOCK_UNLOCKED;
> +		bbr_id->remap_ios_lock = SPIN_LOCK_UNLOCKED;

This initialisation will confound lockdep.  Please use spin_lock_init().

Then please test this code under lockdep ;)

> +		bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
> +	}
> +
> +	return bbr_id;
> +}
> +
> +static void bbr_free_private(struct bbr_private *bbr_id)
> +{
> +	if (bbr_id->bbr_table) {
> +		vfree(bbr_id->bbr_table);
> +	}

unneeded braces.

> +	bbr_free_remap(bbr_id);
> +	kfree(bbr_id);
> +}
> +
> +static u32 crc_table[256];
> +static u32 crc_table_built = 0;

unneeded initialisation.

> +static void build_crc_table(void)
> +{
> +	u32 i, j, crc;
> +
> +	for (i = 0; i <= 255; i++) {
> +		crc = i;
> +		for (j = 8; j > 0; j--) {
> +			if (crc & 1)
> +				crc = (crc >> 1) ^ CRC_POLYNOMIAL;
> +			else
> +				crc >>= 1;
> +		}
> +		crc_table[i] = crc;
> +	}
> +	crc_table_built = 1;
> +}
> +
> +static u32 calculate_crc(u32 crc, void *buffer, u32 buffersize)
> +{
> +	unsigned char *current_byte;
> +	u32 temp1, temp2, i;
> +
> +	current_byte = (unsigned char *) buffer;
> +	/* Make sure the crc table is available */
> +	if (!crc_table_built)
> +		build_crc_table();
> +	/* Process each byte in the buffer. */
> +	for (i = 0; i < buffersize; i++) {
> +		temp1 = (crc >> 8) & 0x00FFFFFF;
> +		temp2 = crc_table[(crc ^ (u32) * current_byte) &
> +				  (u32) 0xff];
> +		current_byte++;
> +		crc = temp1 ^ temp2;
> +	}
> +	return crc;
> +}

Can't this use the kernel's crc library functions?

> +
> +/**
> + * bbr_table_to_remap_list
> + *
> + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
> + * improve run time performance, the in memory remap list must be sorted by
> + * the bad sector LBA. This function is called at discovery time to initialize
> + * the remap list. This function assumes that at least one copy of meta data
> + * is valid.
> + **/
> +static u32 bbr_table_to_remap_list(struct bbr_private *bbr_id)
> +{
> +	u32 in_use_blks = 0;
> +	int i, j;
> +	struct bbr_table *p;
> +
> +	for (i = 0, p = bbr_id->bbr_table;
> +	     i < bbr_id->nr_sects_bbr_table;
> +	     i++, p++) {
> +		if (!p->in_use_cnt) {
> +			break;
> +		}

unneeded braces

> +		in_use_blks += p->in_use_cnt;
> +		for (j = 0; j < p->in_use_cnt; j++) {
> +			bbr_insert_remap_entry(bbr_id, &p->entries[j]);
> +		}

more

> +	}
> +	if (in_use_blks) {
> +		char b[32];
> +		DMWARN("dm-bbr: There are %u BBR entries for device %s",
> +		       in_use_blks, format_dev_t(b, bbr_id->dev->bdev->bd_dev));
> +	}
> +
> +	return in_use_blks;
> +}
> +
> +/**
> + * bbr_search_remap_entry
> + *
> + * Search remap entry for the specified sector. If found, return a pointer to
> + * the table entry. Otherwise, return NULL.
> + **/
> +static struct bbr_table_entry *bbr_search_remap_entry(
> +	struct bbr_private *bbr_id,
> +	u64 lsn)
> +{
> +	struct bbr_runtime_remap *p;
> +
> +	spin_lock_irq(&bbr_id->remap_root_lock);
> +	p = bbr_binary_search(bbr_id->remap_root, lsn);
> +	spin_unlock_irq(&bbr_id->remap_root_lock);
> +	if (p) {
> +		return (&p->remap);
> +	} else {
> +		return NULL;
> +	}

more.

> +}
> +
> +/**
> + * bbr_remap
> + *
> + * If *lsn is in the remap table, return TRUE and modify *lsn,
> + * else, return FALSE.
> + **/
> +static inline int bbr_remap(struct bbr_private *bbr_id,
> +			    u64 *lsn)

lsn is a sector number?  Is u64 the appropriate type to be using?  Why not
the more efficient sector_t?

> +
> +/**
> + * bbr_remap_probe
> + *
> + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
> + * table return TRUE, Else, return FALSE.
> + **/
> +static inline int bbr_remap_probe(struct bbr_private *bbr_id,
> +				  u64 lsn, u64 nr_sects)

And here (everywhere)

> +{
> +	u64 tmp, cnt;
> +
> +	if (atomic_read(&bbr_id->in_use_replacement_blks)) {
> +		for (cnt = 0, tmp = lsn;
> +		     cnt < nr_sects;
> +		     cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
> +			if (bbr_remap(bbr_id,&tmp)) {
> +				return 1;
> +			}

braces.  (everywhere)

> +			DMWARN("dm-bbr: device %s: Trying to remap bad sector "PFU64" to sector "PFU64,

80-cols, please.

> +	/* KMC: Is this the right way to walk the bvec list? */

bio_for_each_segment()?

> +	for (i = 0;
> +	     i < bio->bi_vcnt;
> +	     i++, bio->bi_idx++, starting_lsn += count) {
> +
> +		/* Bvec info: number of sectors, page,
> +		 * and byte-offset within page.
> +		 */
> +		count = bio_iovec(bio)->bv_len >> SECTOR_SHIFT;
> +		pl.page = bio_iovec(bio)->bv_page;
> +		offset = bio_iovec(bio)->bv_offset;
> +
> +
>
> ...
>
> +int __init dm_bbr_init(void)

static?

> +{
> +	int rc;
> +
> +	rc = dm_register_target(&bbr_target);
> +	if (rc) {
> +		DMERR("dm-bbr: error registering target.");
> +		goto err1;
> +	}
> +
> +	bbr_remap_cache = kmem_cache_create("bbr-remap",
> +					    sizeof(struct bbr_runtime_remap),
> +					    0, SLAB_HWCACHE_ALIGN, NULL, NULL);

SLAB_HWCACHE_ALIGN uses extra space.  Is it really needed?

> +	if (!bbr_remap_cache) {
> +		DMERR("dm-bbr: error creating remap cache.");
> +		rc = ENOMEM;
> +		goto err2;
> +	}
> +
> +	bbr_io_cache = kmem_cache_create("bbr-io", sizeof(struct dm_bio_details),
> +					 0, SLAB_HWCACHE_ALIGN, NULL, NULL);

ditto

> +	if (!bbr_io_cache) {
> +		DMERR("dm-bbr: error creating io cache.");
> +		rc = ENOMEM;
> +		goto err3;
> +	}
> +
> +	bbr_io_pool = mempool_create(256, mempool_alloc_slab,
> +				     mempool_free_slab, bbr_io_cache);
> +	if (!bbr_io_pool) {
> +		DMERR("dm-bbr: error creating io mempool.");
> +		rc = ENOMEM;
> +		goto err4;
> +	}
> +
> +	dm_bbr_wq = create_workqueue("dm-bbr");

Could this be a single-threaded workqueue?

> +void __exit dm_bbr_exit(void)

static?

> +{
> +	dm_io_put(1);
> +	destroy_workqueue(dm_bbr_wq);
> +	mempool_destroy(bbr_io_pool);
> +	kmem_cache_destroy(bbr_io_cache);
> +	kmem_cache_destroy(bbr_remap_cache);
> +	dm_unregister_target(&bbr_target);
> +}
> +
> +module_init(dm_bbr_init);
> +module_exit(dm_bbr_exit);
> +MODULE_LICENSE("GPL");
>
> ...
>
> +#if BITS_PER_LONG > 32
> +#define PFU64 "%lu"
> +#else
> +#define PFU64 "%Lu"
> +#endif

This gets ugly.  And I expect it'll generate a warning storm on powerpc,
where u64 is `unsigned long', not `unsigned long long'.  Please just use
%Lu and typecast the argument to `unsigned long long' at each callsite.

> +/**
> + * struct bbr_table_entry
> + * @bad_sect:		LBA of bad location.
> + * @replacement_sect:	LBA of new location.
> + *
> + * Structure to describe one BBR remap.
> + **/
> +struct bbr_table_entry {
> +	u64 bad_sect;
> +	u64 replacement_sect;
> +};

sector_t?





More information about the dm-devel mailing list