[dm-devel] [RFC PATCH v3] dm mpath: add a queue_if_no_path timeout

Frank Mayhar fmayhar at google.com
Tue Nov 5 16:02:12 UTC 2013


This is the patch submitted by Jun'ichi Nomura, originally based on
Mike's patch with some small changes by me.  Jun'ichi's description
follows, along with my changes:

On Tue, 2013-11-05 at 07:18 -0800, Frank Mayhar wrote:
> On Fri, 2013-11-01 at 04:17 +0000, Junichi Nomura wrote:
> > I slightly modified the patch:
> >   - fixed the timeout handler to correctly find
> >     clone request and "struct multipath"
> >   - the timeout handler just disables "queue_if_no_path"
> >     instead of killing the request directly
> >   - "dmsetup status" to show the parameter
> >   - changed an interface between dm core and target
> >   - added some debugging printk (you can remove them)
> > and checked the timeout occurs, at least.
> > 
> > I'm not sure if this feature is good or not though.
> > (The timer behavior is not intuitive, I think)
> Thanks!  I integrated your new patch and tested it.  Sure enough, it
> seems to work.  I've made a few tweaks (added a module tunable and
> support for setting the timer in multipath_message(), removed your debug
> printks) and will submit the modified patch for discussion shortly.

Comments?

---
 block/blk-timeout.c           |    2 
 drivers/md/dm-mpath.c         |   85 ++++++++++++++++++++++++++++++++++++++++++
 drivers/md/dm-table.c         |   10 ++++
 drivers/md/dm.c               |   22 ++++++++++
 include/linux/device-mapper.h |    4 +
 5 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 65f1035..7ea06b0 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -170,7 +170,7 @@ void blk_add_timer(struct request *req)
 	struct request_queue *q = req->q;
 	unsigned long expiry;
 
-	if (!q->rq_timed_out_fn)
+	if (!q->rq_timed_out_fn || !q->rq_timeout)
 		return;
 
 	BUG_ON(!list_empty(&req->timeout_list));
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index de570a5..6f127b7 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -27,6 +27,8 @@
 #define DM_PG_INIT_DELAY_MSECS 2000
 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
 
+#define DEFAULT_NO_PATH_TIMEOUT_SECS 0
+
 /* Path properties */
 struct pgpath {
 	struct list_head list;
@@ -105,6 +107,8 @@ struct multipath {
 	mempool_t *mpio_pool;
 
 	struct mutex work_mutex;
+
+	unsigned no_path_timeout;
 };
 
 /*
@@ -117,6 +121,10 @@ struct dm_mpath_io {
 
 typedef int (*action_fn) (struct pgpath *pgpath);
 
+static unsigned long global_no_path_timeout = DEFAULT_NO_PATH_TIMEOUT_SECS;
+module_param_named(queue_if_no_path_timeout_secs,
+		   global_no_path_timeout, ulong, S_IRUGO | S_IWUSR);
+
 static struct kmem_cache *_mpio_cache;
 
 static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
@@ -207,6 +215,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
 			kfree(m);
 			return NULL;
 		}
+		m->no_path_timeout = global_no_path_timeout;
 		m->ti = ti;
 		ti->private = m;
 	}
@@ -444,6 +453,45 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
 	return 0;
 }
 
+/*
+ * Block timeout callback, called from the block layer
+ *
+ * request_queue lock is held on entry.
+ *
+ * Return values:
+ * BLK_EH_RESET_TIMER if the request should be left running
+ * BLK_EH_NOT_HANDLED if the request is handled or terminated
+ *                    by the driver.
+ */
+enum blk_eh_timer_return multipath_timed_out(struct dm_target *ti,
+					     struct request *clone)
+{
+	unsigned long flags;
+	struct multipath *m = ti->private;
+	int rc = BLK_EH_RESET_TIMER;
+	int flush_ios = 0;
+
+	/*
+	 * Only abort request if:
+	 * - queued_ios is not empty
+	 *   (protect against races with process_queued_ios)
+	 * - no valid paths are found
+	 */
+	spin_lock_irqsave(&m->lock, flags);
+	if (m->no_path_timeout &&
+	    !list_empty(&m->queued_ios) && !m->nr_valid_paths) {
+		flush_ios = 1;
+	}
+	spin_unlock_irqrestore(&m->lock, flags);
+
+	if (flush_ios) {
+		queue_if_no_path(m, 0, 0);
+		queue_work(kmultipathd, &m->process_queued_ios);
+	}
+
+	return rc;
+}
+
 /*-----------------------------------------------------------------
  * The multipath daemon is responsible for resubmitting queued ios.
  *---------------------------------------------------------------*/
@@ -790,6 +838,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 		{0, 6, "invalid number of feature args"},
 		{1, 50, "pg_init_retries must be between 1 and 50"},
 		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
+		{0, 65535, "queue_if_no_path_timeout_secs must be between 0 and 65535"},
 	};
 
 	r = dm_read_arg_group(_args, as, &argc, &ti->error);
@@ -827,6 +876,13 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 			continue;
 		}
 
+		if (!strcasecmp(arg_name, "queue_if_no_path_timeout_secs") &&
+		    (argc >= 1)) {
+			r = dm_read_arg(_args + 3, as, &m->no_path_timeout, &ti->error);
+			argc--;
+			continue;
+		}
+
 		ti->error = "Unrecognised multipath feature request";
 		r = -EINVAL;
 	} while (argc && !r);
@@ -978,6 +1034,27 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
 }
 
 /*
+ * Set the no_path_timeout on behalf of a message.  Note that this only
+ * affects new requests; already-queued requests are unaffected.
+ */
+static int set_no_path_timeout(struct multipath *m, const char *timestr)
+{
+	unsigned long to;
+	unsigned long flags;
+
+	if (!timestr || (sscanf(timestr, "%lu", &to) != 1)) {
+		DMWARN("invalid timeout supplied to set_no_path_timeout");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&m->lock, flags);
+	m->no_path_timeout = to;
+	dm_set_timeout(dm_table_get_md(m->ti->table), m->no_path_timeout);
+	spin_unlock_irqrestore(&m->lock, flags);
+	return 0;
+}
+
+/*
  * Take a path out of use.
  */
 static int fail_path(struct pgpath *pgpath)
@@ -1384,6 +1461,7 @@ static void multipath_resume(struct dm_target *ti)
 
 	spin_lock_irqsave(&m->lock, flags);
 	m->queue_if_no_path = m->saved_queue_if_no_path;
+	dm_set_timeout(dm_table_get_md(m->ti->table), m->no_path_timeout);
 	spin_unlock_irqrestore(&m->lock, flags);
 }
 
@@ -1421,11 +1499,14 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 		DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
 	else {
 		DMEMIT("%u ", m->queue_if_no_path +
+				  (m->no_path_timeout > 0) * 2 +
 			      (m->pg_init_retries > 0) * 2 +
 			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
 			      m->retain_attached_hw_handler);
 		if (m->queue_if_no_path)
 			DMEMIT("queue_if_no_path ");
+		if (m->no_path_timeout)
+			DMEMIT("queue_if_no_path_timeout_secs %u ", m->no_path_timeout);
 		if (m->pg_init_retries)
 			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
 		if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
@@ -1550,6 +1631,9 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 	} else if (!strcasecmp(argv[0], "switch_group")) {
 		r = switch_pg_num(m, argv[1]);
 		goto out;
+	} else if (!strcasecmp(argv[0], "queue_if_no_path_timeout_secs")) {
+		r = set_no_path_timeout(m, argv[1]);
+		goto out;
 	} else if (!strcasecmp(argv[0], "reinstate_path"))
 		action = reinstate_path;
 	else if (!strcasecmp(argv[0], "fail_path"))
@@ -1728,6 +1812,7 @@ static struct target_type multipath_target = {
 	.ioctl  = multipath_ioctl,
 	.iterate_devices = multipath_iterate_devices,
 	.busy = multipath_busy,
+	.timed_out = multipath_timed_out,
 };
 
 static int __init dm_multipath_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 8f87835..32c5399 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -26,6 +26,8 @@
 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
 
+enum blk_eh_timer_return dm_rq_timed_out(struct request *rq);
+
 struct dm_table {
 	struct mapped_device *md;
 	unsigned type;
@@ -1519,6 +1521,7 @@ static void suspend_targets(struct dm_table *t, unsigned postsuspend)
 
 		ti++;
 	}
+	blk_queue_rq_timed_out(dm_disk(t->md)->queue, NULL);
 }
 
 void dm_table_presuspend_targets(struct dm_table *t)
@@ -1540,10 +1543,14 @@ void dm_table_postsuspend_targets(struct dm_table *t)
 int dm_table_resume_targets(struct dm_table *t)
 {
 	int i, r = 0;
+	int has_timeout = 0;
 
 	for (i = 0; i < t->num_targets; i++) {
 		struct dm_target *ti = t->targets + i;
 
+		if (ti->type->timed_out)
+			has_timeout = 1;
+
 		if (!ti->type->preresume)
 			continue;
 
@@ -1552,6 +1559,9 @@ int dm_table_resume_targets(struct dm_table *t)
 			return r;
 	}
 
+	if (has_timeout)
+		blk_queue_rq_timed_out(dm_disk(t->md)->queue, dm_rq_timed_out);
+
 	for (i = 0; i < t->num_targets; i++) {
 		struct dm_target *ti = t->targets + i;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b3e26c7..bb7a29c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1026,6 +1026,13 @@ static void end_clone_request(struct request *clone, int error)
 	dm_complete_request(clone, error);
 }
 
+int dm_set_timeout(struct mapped_device *md, unsigned int tmo)
+{
+	blk_queue_rq_timeout(md->queue, tmo * HZ);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_set_timeout);
+
 /*
  * Return maximum size of I/O possible at the supplied sector up to the current
  * target boundary.
@@ -1829,6 +1836,21 @@ out:
 	dm_put_live_table(md, srcu_idx);
 }
 
+enum blk_eh_timer_return dm_rq_timed_out(struct request *rq)
+{
+	struct request *clone = rq->special;
+	struct dm_rq_target_io *tio = clone->end_io_data;
+
+	if (!tio)
+		goto out; /* not mapped */
+
+	if (tio->ti->type->timed_out)
+		return tio->ti->type->timed_out(tio->ti, clone);
+
+out:
+	return BLK_EH_RESET_TIMER;
+}
+
 int dm_underlying_device_busy(struct request_queue *q)
 {
 	return blk_lld_busy(q);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index ed419c6..84c0368 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -107,6 +107,8 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
 typedef void (*dm_io_hints_fn) (struct dm_target *ti,
 				struct queue_limits *limits);
 
+typedef enum blk_eh_timer_return (*dm_rq_timed_out_fn) (struct dm_target *ti,
+							struct request *clone);
 /*
  * Returns:
  *    0: The target can handle the next I/O immediately.
@@ -162,6 +164,7 @@ struct target_type {
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
+	dm_rq_timed_out_fn timed_out;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
@@ -604,5 +607,6 @@ void dm_dispatch_request(struct request *rq);
 void dm_requeue_unmapped_request(struct request *rq);
 void dm_kill_unmapped_request(struct request *rq, int error);
 int dm_underlying_device_busy(struct request_queue *q);
+int dm_set_timeout(struct mapped_device *md, unsigned int tmo);
 
 #endif	/* _LINUX_DEVICE_MAPPER_H */

-- 
Frank Mayhar
310-460-4042




More information about the dm-devel mailing list