[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[dm-devel] [PATCH 1/3]: region based notifications for dm-io



This patch adds the new interface to dm-io.

Signed-off-by: Stefan Bader <shbader de ibm com>

---
drivers/md/dm-io.c |  614 +++++++++++++++++++++++++++++++++++++----------------
drivers/md/dm-io.h |  121 ++++++++--
2 files changed, 529 insertions(+), 206 deletions(-)
In some cases it is desireable to have notifications sent to the user of the
dm-io interface whenever one of the target regions has been processed or (in
the future) has something to report.
This requires some changes to the dm-io implementation as well as the inter-
face.

Signed-off-by: Stefan Bader <shbader de ibm com>

---
 drivers/md/dm-io.c |  614 +++++++++++++++++++++++++++++++++++++----------------
 drivers/md/dm-io.h |  121 ++++++++--
 2 files changed, 529 insertions(+), 206 deletions(-)

Index: linux-2.6.22-rc2/drivers/md/dm-io.c
===================================================================
--- linux-2.6.22-rc2.orig/drivers/md/dm-io.c
+++ linux-2.6.22-rc2/drivers/md/dm-io.c
@@ -12,144 +12,399 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/interrupt.h>
+
+/*
+ * These are internally used structures that are used for handling I/O requests.
+ */
+struct notify_list {
+	struct sub_io *	head;
+	struct sub_io *	tail;
+	spinlock_t	lock;
+};
 
 struct dm_io_client {
-	mempool_t *pool;
-	struct bio_set *bios;
+	atomic_t		ref_count;
+
+	mempool_t *		io_pool;
+	mempool_t *		sio_pool;
+	struct bio_set *	bios;
+
+	struct tasklet_struct	notify;
+
+	/*
+	 * list of finished sub_ios waiting for notification
+	 * using single linked list for saving memory in sub_io
+	 */
+	struct notify_list	notify_list;
 };
 
-/* FIXME: can we shrink this ? */
+/*
+ * This structure will hold all information about the currently running
+ * requests.
+ */
 struct io {
-	unsigned long error;
-	atomic_t count;
-	struct task_struct *sleeper;
-	struct dm_io_client *client;
-	io_notify_fn callback;
-	void *context;
+	struct dm_io_request_new	req;		/* Must be first */
+
+	struct dm_io_client *		client;
+	atomic_t			ref_count;
+
+	/* simple list of generated sub_ios for io */
+	struct sub_io *			sub_ios;
+	atomic_t			io_count;
+
+	io_notify_fn			old_fn;		/* Will go */
+	void *				old_context;	/* Will go */
+
+	/* FIXME: This might me unnecessary. */
+	unsigned			num_regions;
+};
+
+/*
+ * This structure tracks I/O to a single region.
+ */
+struct sub_io {
+	struct io *			io;
+	atomic_t			ref_count;
+
+	spinlock_t			lock;
+	struct sub_io *			sio_next;
+	struct sub_io *			notify_next;
+
+	struct dm_io_region_state	state;
 };
 
 /*
- * io contexts are only dynamically allocated for asynchronous
- * io.  Since async io is likely to be the majority of io we'll
- * have the same number of io contexts as bios! (FIXME: must reduce this).
+ * sub_io notify list helpers
  */
+static void notify_list_init(struct notify_list *nl)
+{
+	spin_lock_init(&nl->lock);
+	nl->head = nl->tail = NULL;
+}
+
+static void notify_list_push(struct notify_list *nl, struct sub_io *sio)
+{
+	unsigned long	flags;
+
+	spin_lock_irqsave(&nl->lock, flags);
+	if (!nl->head)
+		nl->head = sio;
+	if (nl->tail)
+		nl->tail->notify_next = sio;
+	nl->tail = sio;
+	nl->tail->notify_next = NULL;
+	spin_unlock_irqrestore(&nl->lock, flags);
+}
 
-static unsigned int pages_to_ios(unsigned int pages)
+static struct sub_io *notify_list_pop(struct notify_list *nl)
 {
-	return 4 * pages;	/* too many ? */
+	struct sub_io *	sio;
+	unsigned long	flags;
+
+	spin_lock_irqsave(&nl->lock, flags);
+	sio = nl->head;
+	nl->head = sio ? sio->notify_next : NULL;
+	if (!nl->head)
+		nl->tail = NULL;
+	if (sio)
+		sio->notify_next = NULL;
+	spin_unlock_irqrestore(&nl->lock, flags);
+
+	return sio;
 }
 
+
 /*
  * Create a client with mempool and bioset.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages)
+static void io_notification_handler(unsigned long data);
+struct dm_io_client *dm_io_client_create_new(unsigned min_ios,
+	unsigned max_regions)
 {
-	unsigned ios = pages_to_ios(num_pages);
-	struct dm_io_client *client;
+	struct dm_io_client *	client;
+	unsigned		min_sios = min_ios * max_regions;
 
 	client = kmalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
 		return ERR_PTR(-ENOMEM);
 
-	client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io));
-	if (!client->pool)
-		goto bad;
+	client->io_pool = mempool_create_kzalloc_pool(
+				min_ios, sizeof(struct io));
+	if (!client->io_pool)
+		goto io_pool_failed;
+
+	client->sio_pool = mempool_create_kzalloc_pool(
+				min_sios, sizeof(struct sub_io));
+	if (!client->sio_pool)
+		goto sio_pool_failed;
 
-	client->bios = bioset_create(16, 16);
+	client->bios = bioset_create(min_sios, min_sios);
 	if (!client->bios)
-		goto bad;
+		goto bioset_failed;
+
+	tasklet_init(&client->notify, io_notification_handler,
+			(unsigned long) client);
+	notify_list_init(&client->notify_list);
+
+	atomic_set(&client->ref_count, 1);
 
 	return client;
 
-   bad:
-	if (client->pool)
-		mempool_destroy(client->pool);
+    bioset_failed:
+	mempool_destroy(client->sio_pool);
+
+    sio_pool_failed:
+	mempool_destroy(client->io_pool);
+
+    io_pool_failed:
 	kfree(client);
 	return ERR_PTR(-ENOMEM);
 }
+EXPORT_SYMBOL_GPL(dm_io_client_create_new);
+struct dm_io_client *dm_io_client_create(unsigned num_pages) {
+	return dm_io_client_create_new(num_pages * 4, 16);
+}
 EXPORT_SYMBOL(dm_io_client_create);
 
+int dm_io_client_resize_new(struct dm_io_client *client, unsigned min_ios,
+	unsigned max_regions)
+{
+	unsigned	min_sios = min_ios * max_regions;
+	int		rc;
+
+	/*
+	 * FIXME: There is no bio_resize_bioset(). The only way to change
+	 *        it is to replace it. But that requires no in-flight ios.
+	 */
+	rc = mempool_resize(client->sio_pool, min_sios, GFP_KERNEL);
+	if (rc)
+		return rc;
+
+	rc = mempool_resize(client->io_pool, min_ios, GFP_KERNEL);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(dm_io_client_resize_new);
 int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client)
 {
-	return mempool_resize(client->pool, pages_to_ios(num_pages),
-			      GFP_KERNEL);
+	return dm_io_client_resize_new(client, num_pages * 4, 16);
 }
 EXPORT_SYMBOL(dm_io_client_resize);
 
+static struct dm_io_client *dm_io_client_get(struct dm_io_client *client)
+{
+	atomic_inc(&client->ref_count);
+
+	return client;
+}
+
 void dm_io_client_destroy(struct dm_io_client *client)
 {
-	mempool_destroy(client->pool);
-	bioset_free(client->bios);
-	kfree(client);
+	if (!client)
+		return;
+
+	if (atomic_dec_and_test(&client->ref_count)) {
+		tasklet_disable_nosync(&client->notify);
+		bioset_free(client->bios);
+		mempool_destroy(client->sio_pool);
+		mempool_destroy(client->io_pool);
+		kfree(client);
+	}
 }
 EXPORT_SYMBOL(dm_io_client_destroy);
 
-/*-----------------------------------------------------------------
- * We need to keep track of which region a bio is doing io for.
- * In order to save a memory allocation we store this the last
- * bvec which we know is unused (blech).
- * XXX This is ugly and can OOPS with some configs... find another way.
- *---------------------------------------------------------------*/
-static inline void bio_set_region(struct bio *bio, unsigned region)
+/*
+ * The following definition is for making the code internally more clear.
+ * The external interface is to pair *_create() and *_destroy() calls but
+ * calling *_destroy() is rather a *_put() call since it will release all
+ * memory only if there is no more reference to the client.
+ */
+static void dm_io_client_put(struct dm_io_client *client)
 {
-	bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
+	dm_io_client_destroy(client);
 }
 
-static inline unsigned bio_get_region(struct bio *bio)
+struct dm_io_request_new *dm_io_request_create(struct dm_io_client *client,
+	gfp_t gfp_mask)
 {
-	return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
+	struct io *	io;
+
+	io = mempool_alloc(client->io_pool, gfp_mask);
+	if (!io)
+		return ERR_PTR(-ENOMEM);
+
+	io->client = dm_io_client_get(client);
+	atomic_set(&io->ref_count, 1);
+	atomic_set(&io->io_count, 0);
+
+	io->sub_ios = NULL;
+
+	return (struct dm_io_request_new *) io;
 }
+EXPORT_SYMBOL_GPL(dm_io_request_create);
 
-/*-----------------------------------------------------------------
- * We need an io object to keep track of the number of bios that
- * have been dispatched for a particular io.
- *---------------------------------------------------------------*/
-static void dec_count(struct io *io, unsigned int region, int error)
+static struct io *io_get(struct io *io)
+{
+	atomic_inc(&io->ref_count);
+
+	return io;
+}
+struct dm_io_request_new *dm_io_request_get(struct dm_io_request_new *io_req)
+{
+	return (struct dm_io_request_new *) io_get((struct io *) io_req);
+}
+EXPORT_SYMBOL(dm_io_request_get);
+
+static void io_put(struct io *io)
 {
-	if (error)
-		set_bit(region, &io->error);
+	if (!io)
+		return;
 
-	if (atomic_dec_and_test(&io->count)) {
-		if (io->sleeper)
-			wake_up_process(io->sleeper);
-
-		else {
-			int r = io->error;
-			io_notify_fn fn = io->callback;
-			void *context = io->context;
+	if (atomic_dec_and_test(&io->ref_count)) {
+		struct dm_io_client *	client = io->client;
+		struct sub_io *		sio;
 
-			mempool_free(io, io->client->pool);
-			fn(r, context);
+		while (io->sub_ios) {
+			sio = io->sub_ios;
+			io->sub_ios = sio->sio_next;
+			BUG_ON(sio->io);
+			BUG_ON(atomic_read(&sio->ref_count));
+			mempool_free(sio, client->sio_pool);
 		}
+
+		mempool_free(io, client->io_pool);
+		dm_io_client_put(client);
+	}
+}
+void dm_io_request_put(struct dm_io_request_new *io_req)
+{
+	io_put((struct io *) io_req);
+}
+EXPORT_SYMBOL_GPL(dm_io_request_put);
+
+struct sub_io *sub_io_create(struct io *io, gfp_t gfp_mask)
+{
+	struct sub_io *	sio;
+
+	sio = mempool_alloc(io->client->sio_pool, gfp_mask);
+	if (!sio)
+		return ERR_PTR(-ENOMEM);
+
+	sio->io = io_get(io);
+	atomic_set(&sio->ref_count, 1);
+	spin_lock_init(&sio->lock);
+
+	sio->sio_next = NULL;
+	sio->notify_next = NULL;
+	sio->state.status = DM_IO_PENDING;
+
+	return sio;
+}
+
+struct sub_io *sub_io_get(struct sub_io *sio)
+{
+	atomic_inc(&sio->ref_count);
+
+	return sio;
+}
+
+void sub_io_put(struct sub_io *sio)
+{
+	if (atomic_dec_and_test(&sio->ref_count)) {
+		struct io *	io = sio->io;
+
+		sio->io = NULL;
+		io_put(io);
+	}
+}
+
+
+/*
+ * I/O callbacks. These are called for every I/O or region that has something
+ * to report.
+ */
+static unsigned long io_get_bits(struct io *io)
+{
+	struct sub_io *	sio;
+	unsigned long	bits = 0;
+
+	for (sio = io->sub_ios; sio; sio = sio->sio_next)
+		if (sio->state.status != DM_IO_OK)
+			bits |= (1 << sio->state.region);
+
+	return bits;
+}
+
+static void call_request_notify(struct io *io, struct dm_io_notify_data *nd)
+{
+	if (io->req.notify.fn && io->req.notify.type & DM_IO_NOTIFY_REQUEST) {
+		nd->type       = DM_IO_NOTIFY_REQUEST;
+		nd->error_bits = io_get_bits(io);
+		nd->context    = io->req.notify.context;
+		io->req.notify.fn(nd);
+	}
+}
+
+static void call_region_notify(struct sub_io *sio, struct dm_io_notify_data *nd)
+{
+	if (sio->io->req.notify.fn) {
+		if (sio->io->req.notify.type & DM_IO_NOTIFY_REGION) {
+			nd->type = DM_IO_NOTIFY_REGION;
+			//spin_lock(&sio->lock);
+			nd->context = sio->io->req.notify.context;
+			nd->state   = sio->state;
+			//spin_unlock(&sio->lock);
+			sio->io->req.notify.fn(nd);
+		}
+	}
+	if (atomic_dec_and_test(&sio->io->io_count))
+		call_request_notify(sio->io, nd);
+}
+
+static void io_notification_handler(unsigned long data)
+{
+	struct dm_io_client *		client = (struct dm_io_client *) data;
+	struct sub_io *			sio;
+	struct dm_io_notify_data	nd;
+
+	while ((sio = notify_list_pop(&client->notify_list))) {
+		call_region_notify(sio, &nd);
+		sub_io_put(sio);
 	}
 }
 
 static int endio(struct bio *bio, unsigned int done, int error)
 {
-	struct io *io;
-	unsigned region;
+	struct sub_io *		sio	= bio->bi_private;
+	struct dm_io_client *	client	= sio->io->client;
+	unsigned long		flags;
 
 	/* keep going until we've finished */
 	if (bio->bi_size)
 		return 1;
 
+	spin_lock_irqsave(&sio->lock, flags);
+
+	/* FIXME: really need this ? */
 	if (error && bio_data_dir(bio) == READ)
 		zero_fill_bio(bio);
 
-	/*
-	 * The bio destructor in bio_put() may use the io object.
-	 */
-	io = bio->bi_private;
-	region = bio_get_region(bio);
+	sio->state.status = error ? DM_IO_ERROR : DM_IO_OK;
+	sio->state.error_code = error;
+	spin_unlock_irqrestore(&sio->lock, flags);
 
-	bio->bi_max_vecs++;
-	bio_put(bio);
+	/* Add to list for tasklet processing */
+	notify_list_push(&client->notify_list, sub_io_get(sio));
 
-	dec_count(io, region, error);
+	tasklet_schedule(&client->notify);
+
+	bio_put(bio);
 
 	return 0;
 }
 
+
 /*-----------------------------------------------------------------
  * These little objects provide an abstraction for getting a new
  * destination page for io.
@@ -244,9 +499,11 @@ static void vm_dp_init(struct dpages *dp
 
 static void dm_bio_destructor(struct bio *bio)
 {
-	struct io *io = bio->bi_private;
+	struct sub_io *	sio = bio->bi_private;
 
-	bio_free(bio, io->client->bios);
+	bio->bi_private = NULL;
+	bio_free(bio, sio->io->client->bios);
+	sub_io_put(sio);
 }
 
 /*
@@ -274,160 +531,128 @@ static void km_dp_init(struct dpages *dp
 	dp->context_ptr = data;
 }
 
-/*-----------------------------------------------------------------
- * IO routines that accept a list of pages.
- *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned int region, struct io_region *where,
-		      struct dpages *dp, struct io *io)
+static int dp_init(struct dm_io_memory *mem, struct dpages *dp);
+static void sub_io_submit(struct sub_io *sio, struct dm_io_region *region)
 {
-	struct bio *bio;
-	struct page *page;
-	unsigned long len;
-	unsigned offset;
-	unsigned num_bvecs;
-	sector_t remaining = where->count;
+	struct io *	io		= sio->io;
+	sector_t	remaining	= region->count;
+	struct bio *	bio;
+	struct page *	page;
+	struct dpages	dp;
+	unsigned long	len;
+	unsigned	offset;
+	unsigned	num_bvecs;
+
+	dp_init(&io->req.mem, &dp);
 
 	while (remaining) {
-		/*
-		 * Allocate a suitably sized-bio: we add an extra
-		 * bvec for bio_get/set_region() and decrement bi_max_vecs
-		 * to hide it from bio_add_page().
-		 */
-		num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2;
+		num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 1;
 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
-		bio->bi_sector = where->sector + (where->count - remaining);
-		bio->bi_bdev = where->bdev;
+		bio->bi_sector = region->sector + (region->count - remaining);
+		bio->bi_bdev = region->bdev;
 		bio->bi_end_io = endio;
-		bio->bi_private = io;
+		bio->bi_private = sub_io_get(sio);
 		bio->bi_destructor = dm_bio_destructor;
-		bio->bi_max_vecs--;
-		bio_set_region(bio, region);
 
 		/*
 		 * Try and add as many pages as possible.
 		 */
 		while (remaining) {
-			dp->get_page(dp, &page, &len, &offset);
+			dp.get_page(&dp, &page, &len, &offset);
 			len = min(len, to_bytes(remaining));
 			if (!bio_add_page(bio, page, len, offset))
 				break;
 
 			offset = 0;
 			remaining -= to_sector(len);
-			dp->next_page(dp);
+			dp.next_page(&dp);
 		}
 
-		atomic_inc(&io->count);
-		submit_bio(rw, bio);
-	}
-}
-
-static void dispatch_io(int rw, unsigned int num_regions,
-			struct io_region *where, struct dpages *dp,
-			struct io *io, int sync)
-{
-	int i;
-	struct dpages old_pages = *dp;
-
-	if (sync)
-		rw |= (1 << BIO_RW_SYNC);
-
-	/*
-	 * For multiple regions we need to be careful to rewind
-	 * the dp object for each call to do_region.
-	 */
-	for (i = 0; i < num_regions; i++) {
-		*dp = old_pages;
-		if (where[i].count)
-			do_region(rw, i, where + i, dp, io);
+		sio->state.status = DM_IO_RUNNING;
+		submit_bio(io->req.rw, bio);
 	}
-
-	/*
-	 * Drop the extra reference that we were holding to avoid
-	 * the io being completed too early.
-	 */
-	dec_count(io, 0, 0);
 }
 
-static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-		   struct io_region *where, int rw, struct dpages *dp,
-		   unsigned long *error_bits)
+int dm_io_request_submit(struct dm_io_request_new *io_req,
+	unsigned num_regions, struct dm_io_region *regions)
 {
-	struct io io;
+	struct io *	io	= (struct io *) io_req;
+	struct sub_io *	sio;
+	struct sub_io *	sio_last;
+	int		i;
 
-	if (num_regions > 1 && rw != WRITE) {
+	if (num_regions > 1 && (io_req->rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
-		return -EIO;
+		return -EINVAL;
 	}
 
-	io.error = 0;
-	atomic_set(&io.count, 1); /* see dispatch_io() */
-	io.sleeper = current;
-	io.client = client;
-
-	dispatch_io(rw, num_regions, where, dp, &io, 1);
-
-	while (1) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
+	io->num_regions = num_regions;
+	atomic_set(&io->io_count, num_regions);
 
-		if (!atomic_read(&io.count) || signal_pending(current))
-			break;
+	BUG_ON(io->sub_ios);
 
-		io_schedule();
+	sio = sio_last = NULL;
+	for (i = 0; i < num_regions; i++) {
+		sio = sub_io_create(io, GFP_NOIO);
+		if (!io->sub_ios)
+			io->sub_ios = sio;
+		if (sio_last)
+			sio_last->sio_next = sio;
+		sio_last = sio;
+		sio->state.region = i;
+		sub_io_submit(sio, &regions[i]);
+		sub_io_put(sio);
 	}
-	set_current_state(TASK_RUNNING);
 
-	if (atomic_read(&io.count))
-		return -EINTR;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_io_request_submit);
+
+unsigned long dm_io_request_wait_for_completion(
+	struct dm_io_request_new *io_req)
+{
+	struct io *io = (struct io *) io_req;
 
-	if (error_bits)
-		*error_bits = io.error;
+	while (atomic_read(&io->io_count) > 0 && !signal_pending(current))
+	io_schedule();
 
-	return io.error ? -EIO : 0;
+	return io_get_bits(io);
 }
+EXPORT_SYMBOL_GPL(dm_io_request_wait_for_completion);
 
-static int async_io(struct dm_io_client *client, unsigned int num_regions,
-		    struct io_region *where, int rw, struct dpages *dp,
-		    io_notify_fn fn, void *context)
+
+/*
+ * I/O routine that provides the old style call.
+ */
+static void compat_notify_fn(struct dm_io_notify_data *nd)
 {
-	struct io *io;
+	struct io *	io	= (struct io *) nd->context;
 
-	if (num_regions > 1 && rw != WRITE) {
-		WARN_ON(1);
-		fn(1, context);
-		return -EIO;
+	if (io && io->old_fn) {
+		io->old_fn(nd->error_bits, io->old_context);
+		io->req.notify.context = NULL;
+		io_put(io);
 	}
-
-	io = mempool_alloc(client->pool, GFP_NOIO);
-	io->error = 0;
-	atomic_set(&io->count, 1); /* see dispatch_io() */
-	io->sleeper = NULL;
-	io->client = client;
-	io->callback = fn;
-	io->context = context;
-
-	dispatch_io(rw, num_regions, where, dp, io, 0);
-	return 0;
 }
 
-static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
+static int dp_init(struct dm_io_memory *mem, struct dpages *dp)
 {
 	/* Set up dpages based on memory type */
-	switch (io_req->mem.type) {
+	switch (mem->type) {
 	case DM_IO_PAGE_LIST:
-		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
+		list_dp_init(dp, mem->ptr.pl, mem->offset);
 		break;
 
 	case DM_IO_BVEC:
-		bvec_dp_init(dp, io_req->mem.ptr.bvec);
+		bvec_dp_init(dp, mem->ptr.bvec);
 		break;
 
 	case DM_IO_VMA:
-		vm_dp_init(dp, io_req->mem.ptr.vma);
+		vm_dp_init(dp, mem->ptr.vma);
 		break;
 
 	case DM_IO_KMEM:
-		km_dp_init(dp, io_req->mem.ptr.addr);
+		km_dp_init(dp, mem->ptr.addr);
 		break;
 
 	default:
@@ -437,24 +662,43 @@ static int dp_init(struct dm_io_request 
 	return 0;
 }
 
-/*
- * New collapsed (a)synchronous interface
- */
 int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 	  struct io_region *where, unsigned long *sync_error_bits)
 {
-	int r;
-	struct dpages dp;
+	struct dm_io_request_new *	req;
+	int				r;
+
+	if (!io_req->client)
+		return -EINVAL;
+
+	req = dm_io_request_create(io_req->client, GFP_NOIO);
+	if (!req)
+		return -ENOMEM;
+
+	req->rw = io_req->bi_rw;
+	req->mem = io_req->mem;
+
+	if (!io_req->notify.fn) {
+		req->rw |= BIO_RW_SYNC;
+	} else {
+		struct io *	io = (struct io *) req;
+
+		io->old_fn          = io_req->notify.fn;
+		io->old_context     = io_req->notify.context;
+		req->notify.type    = DM_IO_NOTIFY_REQUEST;
+		req->notify.fn      = compat_notify_fn;
+		req->notify.context = io_get(io);
+	}
+
+	r = dm_io_request_submit(req, num_regions, where);
+
+	if (!io_req->notify.fn && !r) {
+		*sync_error_bits = dm_io_request_wait_for_completion(req);
+		r = *sync_error_bits ? -EIO : 0;
+	}
 
-	r = dp_init(io_req, &dp);
-	if (r)
-		return r;
-
-	if (!io_req->notify.fn)
-		return sync_io(io_req->client, num_regions, where,
-			       io_req->bi_rw, &dp, sync_error_bits);
+	dm_io_request_put(req);
 
-	return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-			&dp, io_req->notify.fn, io_req->notify.context);
+	return r;
 }
 EXPORT_SYMBOL(dm_io);
Index: linux-2.6.22-rc2/drivers/md/dm-io.h
===================================================================
--- linux-2.6.22-rc2.orig/drivers/md/dm-io.h
+++ linux-2.6.22-rc2/drivers/md/dm-io.h
@@ -9,19 +9,62 @@
 
 #include "dm.h"
 
-struct io_region {
+struct dm_io_region {
 	struct block_device *bdev;
 	sector_t sector;
 	sector_t count;		/* If this is zero the region is ignored. */
 };
 
+/* Will go */
+#define io_region dm_io_region
+
 struct page_list {
 	struct page_list *next;
 	struct page *page;
 };
 
+/*
+ * The notification function is called whenever one request completes and/or
+ * if one of the regions of a request changes its state. Currently this is
+ * either successful completion or failure.
+ * Access to this structure is only valid from within a notification
+ * function.
+ */
+enum dm_io_notification_type {
+	DM_IO_NOTIFY_REQUEST = 1,	/* Notification on request level */
+	DM_IO_NOTIFY_REGION  = 2,	/* Notification on region level  */
+};
+
+struct dm_io_region_state {
+	unsigned		region;		/* Index number of region */
+	enum {
+		DM_IO_PENDING,
+		DM_IO_RUNNING,
+		DM_IO_OK,
+		DM_IO_ERROR,
+	}			status;
+	int			error_code;
+};
+
+struct dm_io_notify_data {
+	enum dm_io_notification_type				type;
+	union {
+		struct dm_io_region_state	state;
+		unsigned long			error_bits;
+	};
+	void *							context;
+};
+typedef void (*dm_io_notify_fn)(struct dm_io_notify_data *data);
+
+/* Will go */
 typedef void (*io_notify_fn)(unsigned long error, void *context);
 
+/*
+ * IO request structure to pass in arguments to dm_io()
+ * The rw argument can be READ or WRITE plus any bio flags (for example
+ * BIO_RW_SYNC which is required if the block layer should unplug queues
+ * immediately.
+ */
 enum dm_io_mem_type {
 	DM_IO_PAGE_LIST,/* Page list */
 	DM_IO_BVEC,	/* Bio vector */
@@ -43,37 +86,73 @@ struct dm_io_memory {
 };
 
 struct dm_io_notify {
-	io_notify_fn fn;	/* Callback for asynchronous requests */
-	void *context;		/* Passed to callback */
+	io_notify_fn			fn;	 /* Callback for async req */
+	void *				context; /* Passed to callback     */
+};
+struct dm_io_notify_new {
+	enum dm_io_notification_type	type;	 /* Requested notification */
+	dm_io_notify_fn			fn;	 /* Callback for async req */
+	void *				context; /* Passed to callback     */
 };
 
-/*
- * IO request structure
- */
 struct dm_io_client;
 struct dm_io_request {
-	int bi_rw;			/* READ|WRITE - not READA */
-	struct dm_io_memory mem;	/* Memory to use for io */
-	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
-	struct dm_io_client *client;	/* Client memory handler */
+	int			bi_rw;	/* READ|WRITE + bio flags */
+	struct dm_io_memory	mem;	/* Memory to use for io */
+	struct dm_io_notify	notify;	/* Synchronous if notify.fn is NULL */
+	struct dm_io_client *	client;	/* Client memory handler */
+};
+struct dm_io_request_new {
+	int			rw;		/* READ|WRITE + bio flags */
+	struct dm_io_memory	mem;		/* Memory to use for io   */
+	struct dm_io_notify_new	notify;		/* Notification handler   */
+	unsigned		flags;		/* Future use...          */
 };
 
+
 /*
- * For async io calls, users can alternatively use the dm_io() function below
- * and dm_io_client_create() to create private mempools for the client.
- *
- * Create/destroy may block.
+ * For async io calls, use dm_io_client_create() to create
+ * private mempools for the client.  It returns a client handle
+ * to pass into the functions below.
+ * The caller will specify how many requests the client must be able to
+ * handle in low-mem situations and how many regions will be used
+ * alltogether. This can be changed by the resize call.
  */
-struct dm_io_client *dm_io_client_create(unsigned num_pages);
-int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client);
+struct dm_io_client *dm_io_client_create_new(unsigned min_ios,
+	unsigned max_regions);
+int dm_io_client_resize_new(struct dm_io_client *client, unsigned min_ios,
+	unsigned max_regions);
 void dm_io_client_destroy(struct dm_io_client *client);
 
+/* Old client functions */
+struct dm_io_client * __deprecated dm_io_client_create(unsigned num_pages);
+int __deprecated dm_io_client_resize(unsigned num_pages,
+	struct dm_io_client *client);
+
 /*
- * IO interface using private per-client pools.
- * Each bit in the optional 'sync_error_bits' bitset indicates whether an
- * error occurred doing io to the corresponding region.
+ * To start I/O the caller must first create a new request structure by
+ * calling dm_io_create(). Then I/O can be started with dm_io_submit().
+ * If the caller doesn't need the request anymore (for waiting) it has to
+ * release the request by calling dm_io_request_put().
+ *
+ * Notes: 1. If a request should be synchronous the call has to:
+ *           a) add the BIO_RW_SYNC flag to io_req->rw
+ *           b) wait until the request has been completed
+ *        2. The calls to dm_io_create and dm_io_submit might block.
  */
-int dm_io(struct dm_io_request *io_req, unsigned num_regions,
-	  struct io_region *region, unsigned long *sync_error_bits);
+struct dm_io_request_new *dm_io_request_create(struct dm_io_client *client,
+	gfp_t gfp_mask);
+
+int dm_io_request_submit(struct dm_io_request_new *io_req,
+	unsigned num_regions, struct dm_io_region *regions);
+unsigned long dm_io_request_wait_for_completion(
+	struct dm_io_request_new *io_req);
+
+struct dm_io_request_new *dm_io_request_get(struct dm_io_request_new *io_req);
+void dm_io_request_put(struct dm_io_request_new *io_req);
+
+/* Old I/O interface */
+int __deprecated dm_io(struct dm_io_request *io_req, unsigned num_regions,
+	struct dm_io_region *region, unsigned long *sync_error_bits);
 
 #endif

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]