[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]
[dm-devel] [PATCH][RFC] supporting different failover models
- From: Mike Christie <michaelc cs wisc edu>
- To: thornber redhat com
- Cc: dm-devel redhat com
- Subject: [dm-devel] [PATCH][RFC] supporting different failover models
- Date: Wed Feb 11 17:06:07 2004
Hey Joe,
A good use of the Priority Group framework is to place your primary
paths in one group and your secondary, tertiary, etc paths in a higher
groups. The problem is that some devices will require a special command
be sent before the other paths can be used. And another class of devices
will simply perform the switch when IO is sent down the secondary paths.
For the latter group of devices the test IOs are going to cause switches
which is going to cause performance problems. By only testing the
current group or specifying which groups should be tested and just
seperating your paths through priority groups, we can avoid that
problem, you do not have to worry about revalidating secondary paths
that were previously initially failed, plus I think you can then kill
the nr_tested_paths code.
All that is needed to support the devices that require a special command
are some callouts in the priority group framework to initialize the
group before it is used. If this is done from the daemon we can sleep
and wait for the result, and you have already provided a way to queue
incoming IO from dm. Would it be ok to call this from a target? In
general is it going to be better to throttle incoming IO while failed
bios are resubmitted?
I have attached a patch (built against 2.6.2-udm1) that begins to
implement this. It does not provide the priority group callouts and it
is untested and really only meant to show what I mean. Is this direction
OK, or what is the intended purpose of the test IO+nr_test_paths code?
I also replaced the dm-daemon usage for a workqueue. It will allow us to
do requeueing for different devices in parallel, and it attempts to
execute the work on the same processor as it was queued.
Mike
--- linux-2.6.2/drivers/md/dm-mpath.c 2004-02-10 17:33:58.000000000 -0800
+++ linux-2.6.2-work/drivers/md/dm-mpath.c 2004-02-10 23:24:14.000000000 -0800
@@ -16,6 +16,8 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/time.h>
+#include <linux/workqueue.h>
+#include <linux/timer.h>
#include <asm/atomic.h>
#define MPATH_FAIL_COUNT 1
@@ -62,17 +64,18 @@ struct multipath {
struct list_head priority_groups;
spinlock_t lock;
- unsigned nr_valid_paths;
- unsigned nr_tested_paths;
struct path *current_path;
unsigned current_count;
unsigned min_io;
+ struct work_struct dispatch_failed;
struct bio_list failed_ios;
- struct bio_list test_ios;
+ struct work_struct test_pgroup;
+ struct timer_list test_timer;
unsigned test_interval;
+
unsigned trigger_event;
};
@@ -159,6 +162,9 @@ static void free_priority_group(struct p
kfree(pg);
}
+static void dispatch_failed_ios(void *data);
+static void test_pgroup(void *data);
+
static struct multipath *alloc_multipath(void)
{
struct multipath *m;
@@ -169,6 +175,9 @@ static struct multipath *alloc_multipath
INIT_LIST_HEAD(&m->priority_groups);
m->lock = SPIN_LOCK_UNLOCKED;
m->min_io = MPATH_MIN_IO;
+ INIT_WORK(&m->dispatch_failed, dispatch_failed_ios, m);
+ INIT_WORK(&m->test_pgroup, test_pgroup, m);
+ init_timer(&m->test_timer);
}
return m;
@@ -187,7 +196,7 @@ static void free_multipath(struct multip
}
/*-----------------------------------------------------------------
- * All paths should be tested periodically.
+ * All valid paths should be tested periodically.
*---------------------------------------------------------------*/
static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
{
@@ -200,15 +209,9 @@ static void iterate_paths(struct multipa
}
}
-static void clear_tested(struct path *p)
-{
- p->tested = 0;
-}
-
static void fail_path(struct path *path)
{
unsigned long flags;
- struct multipath *m;
spin_lock_irqsave(&path->failed_lock, flags);
@@ -218,24 +221,6 @@ static void fail_path(struct path *path)
path->fail_total++;
path->pg->ps->type->set_path_state(path->pg->ps, path, 0);
path->pg->m->trigger_event = 1;
-
- m = path->pg->m;
- spin_lock(&m->lock);
- m->nr_valid_paths--;
- if (!m->nr_valid_paths) {
- iterate_paths(m, clear_tested);
- m->nr_tested_paths = 0;
- }
- spin_unlock(&m->lock);
- }
-
- if (!path->tested) {
- path->tested = 1;
-
- m = path->pg->m;
- spin_lock(&m->lock);
- m->nr_tested_paths++;
- spin_unlock(&m->lock);
}
spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -253,10 +238,6 @@ static void recover_path(struct path *pa
path->fail_count = MPATH_FAIL_COUNT;
path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
m->trigger_event = 1;
-
- spin_lock(&m->lock);
- m->nr_valid_paths++;
- spin_unlock(&m->lock);
}
spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -278,40 +259,61 @@ static int test_endio(struct bio *bio, u
return 0;
}
-static void test_path(struct path *p)
+static void test_pgroup(void *data)
{
- /*
- * If we get this lock we're allowed to issue a test io
- * for this path.
- */
- if (down_trylock(&p->test_lock))
- return;
+ struct multipath *m = (struct multipath *)data;
+ struct priority_group *pg;
+ struct path *p;
+ unsigned long flags;
- p->test_bio->bi_sector = 0;
- p->test_bio->bi_bdev = p->dev->bdev;
- p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
- p->test_bio->bi_idx = 0;
+ spin_lock_irqsave(&m->lock, flags);
+ p = m->current_path;
+ spin_unlock_irqrestore(&m->lock, flags);
- bio_list_add(&p->pg->m->test_ios, p->test_bio);
+ if (!p)
+ return;
+
+ pg = p->pg;
+ list_for_each_entry (p, &pg->paths, list) {
+ /* TODO - add test for path idleness */
+
+ /*
+ * If we get this lock we're allowed to issue a test io
+ * for this path.
+ */
+ if (down_trylock(&p->test_lock))
+ continue;
+
+ p->test_bio->bi_sector = 0;
+ p->test_bio->bi_bdev = p->dev->bdev;
+ p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
+ p->test_bio->bi_idx = 0;
+
+ generic_make_request(p->test_bio);
+ }
}
/*-----------------------------------------------------------------
- * The multipath daemon is responsible for resubmitting failed ios.
+ * The multipath daemon is responsible for submitting failed and
+ * path test ios.
*---------------------------------------------------------------*/
-static struct dm_daemon _kmpathd;
+static struct workqueue_struct *_kmpathd_wq;
-static LIST_HEAD(_mpaths);
-static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED;
+static void add_test_timer(struct multipath *m);
-static void submit_ios(struct bio *bio)
+static void queue_test_ios(struct multipath *m)
{
- struct bio *next;
+ queue_work(_kmpathd_wq, &m->test_pgroup);
+ add_test_timer(m);
+}
- while (bio) {
- next = bio->bi_next;
- bio->bi_next = NULL;
- generic_make_request(bio);
- bio = next;
+static void add_test_timer(struct multipath *m)
+{
+ if (m->test_interval) {
+ m->test_timer.function = (void (*)(unsigned long))queue_test_ios;
+ m->test_timer.data = (unsigned long)m;
+ m->test_timer.expires = m->test_interval;
+ add_timer(&m->test_timer);
}
}
@@ -320,13 +322,11 @@ static int __choose_path(struct multipat
struct priority_group *pg;
struct path *path = NULL;
- if (m->nr_valid_paths) {
- /* loop through the priority groups until we find a valid path. */
- list_for_each_entry (pg, &m->priority_groups, list) {
- path = pg->ps->type->select_path(pg->ps);
- if (path)
- break;
- }
+ /* loop through the priority groups until we find a valid path. */
+ list_for_each_entry (pg, &m->priority_groups, list) {
+ path = pg->ps->type->select_path(pg->ps);
+ if (path)
+ break;
}
m->current_path = path;
@@ -364,68 +364,31 @@ static int map_io(struct multipath *m, s
return 0;
}
-static void dispatch_failed_ios(struct multipath *m)
+static void dispatch_failed_ios(void *data)
{
+ struct multipath *m = (struct multipath *)data;
int r;
unsigned long flags;
struct bio *bio = NULL, *next;
spin_lock_irqsave(&m->lock, flags);
- if (m->nr_valid_paths || (m->nr_tested_paths == m->nr_paths))
- bio = bio_list_get(&m->failed_ios);
+ bio = bio_list_get(&m->failed_ios);
spin_unlock_irqrestore(&m->lock, flags);
-
while (bio) {
next = bio->bi_next;
bio->bi_next = NULL;
r = map_io(m, bio);
if (r)
- /*
- * This wont loop forever because the
- * end_io function will fail the ios if
- * we've tested all the paths.
- */
bio_io_error(bio, bio->bi_size);
-
else
generic_make_request(bio);
bio = next;
}
-}
-
-/*
- * Multipathd does this every time it runs, returning a sleep
- * duration hint.
- */
-static jiffy_t do_work(void)
-{
- unsigned long flags;
- struct multipath *m;
- unsigned interval = 0;
-
- spin_lock(&_mpath_lock);
- list_for_each_entry (m, &_mpaths, list) {
- dispatch_failed_ios(m);
- iterate_paths(m, test_path);
- submit_ios(bio_list_get(&m->test_ios));
-
- spin_lock_irqsave(&m->lock, flags);
- if (m->trigger_event) {
- dm_table_event(m->ti->table);
- m->trigger_event = 0;
- }
- spin_unlock_irqrestore(&m->lock, flags);
-
- interval = min_not_zero(interval, m->test_interval);
- }
- spin_unlock(&_mpath_lock);
blk_run_queues();
-
- return ((jiffy_t) interval) * HZ;
}
/*-----------------------------------------------------------------
@@ -681,15 +644,11 @@ static int multipath_ctr(struct dm_targe
__insert_priority_group(m, pg);
}
}
- m->nr_valid_paths = m->nr_paths;
+ add_test_timer(m);
ti->private = m;
m->ti = ti;
- spin_lock(&_mpath_lock);
- list_add(&m->list, &_mpaths);
- spin_unlock(&_mpath_lock);
-
return 0;
bad:
@@ -700,11 +659,6 @@ static int multipath_ctr(struct dm_targe
static void multipath_dtr(struct dm_target *ti)
{
struct multipath *m = (struct multipath *) ti->private;
-
- spin_lock(&_mpath_lock);
- list_del(&m->list);
- spin_unlock(&_mpath_lock);
-
free_multipath(m);
}
@@ -745,13 +699,6 @@ static int multipath_end_io(struct dm_ta
struct multipath *m = (struct multipath *) ti->private;
if (error) {
- spin_lock(&m->lock);
- if (!m->nr_valid_paths && (m->nr_tested_paths == m->nr_paths)) {
- spin_unlock(&m->lock);
- return -EIO;
- }
- spin_unlock(&m->lock);
-
path = find_path(m, bio->bi_bdev);
fail_path(path);
@@ -760,7 +707,7 @@ static int multipath_end_io(struct dm_ta
bio_list_add(&m->failed_ios, bio);
spin_unlock(&m->lock);
- dm_daemon_wake(&_kmpathd);
+ queue_work(_kmpathd_wq, &m->dispatch_failed);
return 1; /* io not complete */
}
@@ -887,8 +834,8 @@ int __init dm_multipath_init(void)
return r;
}
- r = dm_daemon_start(&_kmpathd, "kpathd", do_work);
- if (r) {
+ _kmpathd_wq = create_workqueue("kmpathd");
+ if (!_kmpathd_wq) {
/* FIXME: remove this */
dm_unregister_path_selectors();
dm_unregister_target(&multipath_target);
@@ -902,7 +849,7 @@ void __exit dm_multipath_exit(void)
{
int r;
- dm_daemon_stop(&_kmpathd);
+ destroy_workqueue(_kmpathd_wq);
dm_unregister_path_selectors();
r = dm_unregister_target(&multipath_target);
if (r < 0)
[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]