[dm-devel] [PATCH] [dm-thin] experimental erase-log patch.

Joe Thornber ejt at redhat.com
Wed Feb 22 14:25:50 UTC 2012


This is just to let me get an idea of the costs involved with
implementing an erase log.
---
 drivers/md/dm-thin-metadata.c |   56 +++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm-thin-metadata.h |    6 ++++
 drivers/md/dm-thin.c          |   38 ++++++++++++++++++++-------
 3 files changed, 90 insertions(+), 10 deletions(-)

diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index f3ba61d..c392068 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -124,6 +124,11 @@ struct thin_disk_superblock {
 	__le32 compat_flags;
 	__le32 compat_ro_flags;
 	__le32 incompat_flags;
+
+	/*
+	 * Hold's blocks that will need to be zeroed as part of recovery from crash.
+	 */
+	__le64 erase_root;
 } __packed;
 
 struct disk_device_details {
@@ -170,11 +175,17 @@ struct dm_pool_metadata {
 	 */
 	struct dm_btree_info details_info;
 
+	/*
+	 * Blocks that need erasing on recovery.
+	 */
+	struct dm_btree_info erase_info;
+
 	struct rw_semaphore root_lock;
 	uint32_t time;
 	int need_commit;
 	dm_block_t root;
 	dm_block_t details_root;
+	dm_block_t erase_root;
 	struct list_head thin_devices;
 	uint64_t trans_id;
 	unsigned long flags;
@@ -465,6 +476,14 @@ static int init_pmd(struct dm_pool_metadata *pmd,
 	pmd->details_info.value_type.dec = NULL;
 	pmd->details_info.value_type.equal = NULL;
 
+	pmd->erase_info.tm = tm;
+	pmd->erase_info.levels = 1;
+	pmd->erase_info.value_type.context = NULL;
+	pmd->erase_info.value_type.size = sizeof(__le64);
+	pmd->erase_info.value_type.inc = NULL;
+	pmd->erase_info.value_type.dec = NULL;
+	pmd->erase_info.value_type.equal = NULL;
+
 	pmd->root = 0;
 
 	init_rwsem(&pmd->root_lock);
@@ -735,6 +754,12 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 		goto bad;
 	}
 
+	r = dm_btree_empty(&pmd->erase_info, &pmd->erase_root);
+	if (r < 0) {
+		DMERR("couldn't create erase journal");
+		goto bad;
+	}
+
 	pmd->flags = 0;
 	pmd->need_commit = 1;
 	r = dm_pool_commit_metadata(pmd);
@@ -1332,6 +1357,37 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
 	return r;
 }
 
+int dm_pool_mark_erase(struct dm_pool_metadata *pmd, dm_block_t b)
+{
+	int r;
+	uint64_t key = b;
+	__le64 value = cpu_to_le64(pmd->time);
+
+	down_write(&pmd->root_lock);
+	r = dm_btree_insert(&pmd->erase_info, pmd->erase_root,
+			    &key, &value, &pmd->erase_root);
+	if (!r)
+		pmd->need_commit = 1;
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
+int dm_pool_clear_erase(struct dm_pool_metadata *pmd, dm_block_t b)
+{
+	int r;
+	uint64_t key = b;
+
+	down_write(&pmd->root_lock);
+	r = dm_btree_remove(&pmd->erase_info, pmd->erase_root,
+			    &key, &pmd->erase_root);
+	if (!r)
+		pmd->need_commit = 1;
+	up_write(&pmd->root_lock);
+
+	return r;
+}
+
 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
 {
 	int r;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index cfc7d0b..42a4268 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -126,6 +126,12 @@ int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
 int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block);
 
 /*
+ * Erase log
+ */
+int dm_pool_mark_erase(struct dm_pool_metadata *pmd, dm_block_t b);
+int dm_pool_clear_erase(struct dm_pool_metadata *pmd, dm_block_t b);
+
+/*
  * Queries.
  */
 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 0da0db2..7536db1 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -539,6 +539,7 @@ struct pool {
 	struct bio_list deferred_flush_bios;
 	struct list_head prepared_mappings;
 	struct list_head prepared_discards;
+	struct list_head copy_jobs;
 
 	struct bio_list retry_on_resume_list;
 
@@ -809,14 +810,6 @@ static void overwrite_endio(struct bio *bio, int err)
 /*----------------------------------------------------------------*/
 
 /*
- * Workqueue.
- */
-
-/*
- * Prepared mapping jobs.
- */
-
-/*
  * This sends the bios in the cell back to the deferred_bios list.
  */
 static void cell_defer(struct thin_c *tc, struct cell *cell,
@@ -878,6 +871,13 @@ static void process_prepared_mapping(struct new_mapping *m)
 		return;
 	}
 
+	r = dm_pool_clear_erase(tc->pool->pmd, m->data_block);
+	if (r) {
+		DMERR("dm_pool_clear_erase() failed");
+		cell_error(m->cell);
+		return;
+	}
+
 	/*
 	 * Release any bios held while the block was being provisioned.
 	 * If we are processing a write bio that completely covers the block,
@@ -996,6 +996,13 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 	if (!ds_add_work(&pool->shared_read_ds, &m->list))
 		m->quiesced = 1;
 
+	r = dm_pool_mark_erase(pool->pmd, data_dest);
+	if (r) {
+		mempool_free(m, pool->mapping_pool);
+		DMERR("dm_kcopyd_copy() failed");
+		cell_error(cell);
+	}
+
 	/*
 	 * IO to pool_dev remaps to the pool target's data_dev.
 	 *
@@ -1007,8 +1014,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 		h->overwrite_mapping = m;
 		m->bio = bio;
 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-		remap_and_issue(tc, bio, data_dest);
+		remap(tc, bio, data_dest);
+		bio_list_add(&pool->deferred_flush_bios, bio);
+
 	} else {
+		/*
+		 * FIXME: this shouldn't be done until after the commit of
+		 * the erase state change.  No point doing it now, for this
+		 * little experiment.  Just use small block sizes.
+		 */
 		struct dm_io_region from, to;
 
 		from.bdev = origin->bdev;
@@ -1062,6 +1076,8 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 	m->err = 0;
 	m->bio = NULL;
 
+	dm_pool_mark_erase(pool->pmd, data_block);
+
 	/*
 	 * If the whole block of data is being overwritten or we are not
 	 * zeroing pre-existing data, we can issue the bio immediately.
@@ -1075,7 +1091,8 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 		h->overwrite_mapping = m;
 		m->bio = bio;
 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
-		remap_and_issue(tc, bio, data_block);
+		remap(tc, bio, data_block);
+		bio_list_add(&pool->deferred_flush_bios, bio);
 
 	} else {
 		int r;
@@ -1087,6 +1104,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 
 		r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
 		if (r < 0) {
+			dm_pool_clear_erase(pool->pmd, data_block);
 			mempool_free(m, pool->mapping_pool);
 			DMERR("dm_kcopyd_zero() failed");
 			cell_error(cell);
-- 
1.7.5.4




More information about the dm-devel mailing list