[dm-devel] [PATCH] 1/1: dm-mpath: Use one list of paths in the priority_group

Kevin Corry kevcorry at us.ibm.com
Fri Jan 23 11:35:02 UTC 2004


Here's the patch to combine the valid_paths and invalid_paths lists in the
priority_group into a singe list. It really pains me to have to send in a
patch this huge, but I really couldn't find a meaningful way to break it up.

-- 
Kevin Corry
kevcorry at us.ibm.com
http://evms.sourceforge.net/


In the priority_group struct, combine the valid_paths and invalid_paths lists
into a single paths list. This is done to eliminate a dead-lock condition
that will occur if a multipath device is suspended while one of its paths
is in the middle of being tested.

This has quite a rippling effect on the data structures and locking throughout
dm-mpath.c. In the path struct, we introduce a failed_lock spinlock to protect
has_failed, fail_count, and fail_total. In the multipath struct, the path_lock
spinlock is removed and replaced with a current_lock spinlock to protect the
current_path pointer and count of I/Os to the current_path. With this new
locking, some of the atomc_t's can become simple unsigned int's. The code
changes look very substantial, but it all relates to the elimination of the
path_lock and the addition of the failed_lock and current_lock.

Also in the multipath struct, we remove the test_ios bio list. This was
introduced a few revisions ago because we didn't want to submit the test
bio's while holding the path_lock. Now that the path_lock is gone, we can
go back to submitting the test bio's in test_path() instead of queueing them
to be submitted later in do_work().


--- diff/drivers/md/dm-mpath.c	2004-01-23 09:55:27.000000000 -0600
+++ source/drivers/md/dm-mpath.c	2004-01-23 10:10:55.000000000 -0600
@@ -28,9 +28,10 @@
 	struct dm_dev *dev;
 	struct priority_group *pg;
 
+	spinlock_t failed_lock;
 	int has_failed;
-	atomic_t fail_count;
-	atomic_t fail_total;
+	unsigned fail_count;
+	unsigned fail_total;
 
 	struct semaphore test_lock;
 	struct bio *test_bio;
@@ -43,8 +44,7 @@
 	unsigned priority;
 	struct multipath *m;
 	struct path_selector *ps;
-	struct list_head valid_paths;
-	struct list_head invalid_paths;
+	struct list_head paths;
 };
 
 /* Multipath context */
@@ -52,15 +52,15 @@
 	struct list_head list;
 	struct dm_target *ti;
 
-	spinlock_t path_lock;
 	struct list_head priority_groups;
+
+	spinlock_t current_lock;
 	struct path *current_path;
-	atomic_t count;
+	unsigned current_count;
 	unsigned min_io;
 
 	spinlock_t failed_lock;
 	struct bio_list failed_ios;
-	struct bio_list test_ios;
 
 	unsigned test_interval;
 	atomic_t trigger_event;
@@ -72,8 +72,8 @@
 
 	if (path) {
 		memset(path, 0, sizeof(*path));
-		atomic_set(&path->fail_count, MPATH_FAIL_COUNT);
-		atomic_set(&path->fail_total, 0);
+		path->failed_lock = SPIN_LOCK_UNLOCKED;
+		path->fail_count = MPATH_FAIL_COUNT;
 		init_MUTEX_LOCKED(&path->test_lock); /* resume will unlock */
 
 		path->test_bio = bio_alloc(GFP_KERNEL, 1);
@@ -116,8 +116,7 @@
 	}
 	memset(pg->ps, 0, sizeof(*pg->ps));
 
-	INIT_LIST_HEAD(&pg->valid_paths);
-	INIT_LIST_HEAD(&pg->invalid_paths);
+	INIT_LIST_HEAD(&pg->paths);
 
 	return pg;
 }
@@ -146,8 +145,7 @@
 		kfree(ps);
 	}
 
-	free_paths(&pg->valid_paths, ti);
-	free_paths(&pg->invalid_paths, ti);
+	free_paths(&pg->paths, ti);
 	kfree(pg);
 }
 
@@ -158,8 +156,8 @@
 	m = kmalloc(sizeof(*m), GFP_KERNEL);
 	if (m) {
 		memset(m, 0, sizeof(*m));
-		m->path_lock = SPIN_LOCK_UNLOCKED;
 		INIT_LIST_HEAD(&m->priority_groups);
+		m->current_lock = SPIN_LOCK_UNLOCKED;
 		m->failed_lock = SPIN_LOCK_UNLOCKED;
 		m->min_io = MPATH_MIN_IO;
 	}
@@ -182,48 +180,51 @@
 /*-----------------------------------------------------------------
  * All paths should be tested periodically.
  *---------------------------------------------------------------*/
-static void __fail_path(struct path *path)
+static void fail_path(struct path *path)
 {
-	if (path->has_failed)
-		return;
+	unsigned long flags;
 
-	/* FIXME: this is brain dead */
-	if (!atomic_dec_and_test(&path->fail_count))
-		return;
+	spin_lock_irqsave(&path->failed_lock, flags);
 
-	path->has_failed = 1;
-//	path->fail_time = jiffies;
-	atomic_inc(&path->fail_total);
-	list_move(&path->list, &path->pg->invalid_paths);
-	path->pg->ps->type->set_path_state(path->pg->ps, path, 0);
+	if (!path->has_failed) {
+		/* FIXME: this is brain dead */
+		if (--path->fail_count == 0) {
+			path->has_failed = 1;
+			path->fail_total++;
+			path->pg->ps->type->set_path_state(path->pg->ps,
+							   path, 0);
+		}
+	}
+
+	spin_unlock_irqrestore(&path->failed_lock, flags);
 }
 
-static void __recover_path(struct path *path)
+static void recover_path(struct path *path)
 {
-	if (!path->has_failed)
-		return;
+	unsigned long flags;
+
+	spin_lock_irqsave(&path->failed_lock, flags);
+
+	if (path->has_failed) {
+		path->has_failed = 0;
+		path->fail_count = MPATH_FAIL_COUNT;
+		path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
+	}
 
-	atomic_set(&path->fail_count, MPATH_FAIL_COUNT);
-	list_move(&path->list, &path->pg->valid_paths);
-	path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
-	path->has_failed = 0;
+	spin_unlock_irqrestore(&path->failed_lock, flags);
 }
 
 static int test_endio(struct bio *bio, unsigned int done, int error)
 {
 	struct path *path = (struct path *) bio->bi_private;
-	struct multipath *m = path->pg->m;
-	unsigned long flags;
 
 	if (bio->bi_size)
 		return 1;
 
-	spin_lock_irqsave(&m->path_lock, flags);
 	if (error)
-		__fail_path(path);
+		fail_path(path);
 	else
-		__recover_path(path);
-	spin_unlock_irqrestore(&m->path_lock, flags);
+		recover_path(path);
 
 	up(&path->test_lock);
 	return 0;
@@ -232,14 +233,15 @@
 static void test_path(struct path *p)
 {
 	if (down_trylock(&p->test_lock))
-		return;		/* last test io still pending */
+		/* last test io still pending or device is suspended */
+		return;
 
 	p->test_bio->bi_sector = 0;
 	p->test_bio->bi_bdev = p->dev->bdev;
 	p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
 	p->test_bio->bi_idx = 0;
 
-	bio_list_add(&p->pg->m->test_ios, p->test_bio);
+	generic_make_request(p->test_bio);
 }
 
 /*-----------------------------------------------------------------
@@ -250,44 +252,32 @@
 static LIST_HEAD(_mpaths);
 static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED;
 
-static void submit_ios(struct bio *bio)
-{
-	struct bio *next;
-	while (bio) {
-		next = bio->bi_next;
-		bio->bi_next = NULL;
-		generic_make_request(bio);
-		bio = next;
-	}
-}
-
 static void dispatch_failed_ios(struct multipath *m)
 {
 	unsigned long flags;
-	struct bio *bio;
+	struct bio *bio, *next;
 
 	spin_lock_irqsave(&m->failed_lock, flags);
 	bio = bio_list_get(&m->failed_ios);
 	spin_unlock_irqrestore(&m->failed_lock, flags);
 
-	submit_ios(bio);
+	while (bio) {
+		next = bio->bi_next;
+		bio->bi_next = NULL;
+		generic_make_request(bio);
+		bio = next;
+	}
 }
 
 static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
 {
 	struct priority_group *pg;
 	struct path *p;
-	unsigned long flags;
 
-	spin_lock_irqsave(&m->path_lock, flags);
 	list_for_each_entry (pg, &m->priority_groups, list) {
-		list_for_each_entry (p, &pg->valid_paths, list)
-			fn(p);
-
-		list_for_each_entry (p, &pg->invalid_paths, list)
+		list_for_each_entry (p, &pg->paths, list)
 			fn(p);
 	}
-	spin_unlock_irqrestore(&m->path_lock, flags);
 }
 
 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
@@ -303,7 +293,6 @@
 	list_for_each_entry (m, &_mpaths, list) {
 		dispatch_failed_ios(m);
 		iterate_paths(m, test_path);
-		submit_ios(bio_list_get(&m->test_ios));
 
 		if (atomic_dec_and_test(&m->trigger_event))
 			dm_table_event(m->ti->table);
@@ -495,7 +484,7 @@
 			goto bad;
 
 		path->pg = pg;
-		list_add_tail(&path->list, &pg->valid_paths);
+		list_add_tail(&path->list, &pg->paths);
 		consume(as, nr_params);
 	}
 
@@ -592,7 +581,6 @@
 {
 	struct multipath *m = (struct multipath *) ti->private;
 
-//	wait_for_scrub_ios(m);
 	spin_lock(&_mpath_lock);
 	list_del(&m->list);
 	spin_unlock(&_mpath_lock);
@@ -600,36 +588,40 @@
 	free_multipath(m);
 }
 
-static struct priority_group *__find_priority_group(struct list_head *pgs)
+/* Must hold m->current_lock */
+static int __choose_path(struct multipath *m)
 {
 	struct priority_group *pg;
+	struct path *path = NULL;
 
-	list_for_each_entry (pg, pgs, list) {
-		if (!list_empty(&pg->valid_paths))
-			return pg;
+	/* loop through the priority groups until we find a valid path. */
+	list_for_each_entry (pg, &m->priority_groups, list) {
+		path = pg->ps->type->select_path(pg->ps);
+		if (path)
+			break;
 	}
 
-	return NULL;
+	m->current_path = path;
+	m->current_count = m->min_io;
+	return 0;
 }
 
-static int __choose_path(struct multipath *m)
+static struct path *get_current_path(struct multipath *m)
 {
-	struct priority_group *pg;
 	struct path *path;
+	unsigned long flags;
 
-	/* get the priority group */
-	pg = __find_priority_group(&m->priority_groups);
-	if (!pg)
-		return -EIO;
+	spin_lock_irqsave(&m->current_lock, flags);
 
-	/* select a path */
-	path = pg->ps->type->select_path(pg->ps);
-	if (!path)
-		return -EIO;	/* No valid path found */
+	/* Do we need to select a new path? */
+	if (!m->current_path || --m->current_count == 0) {
+		__choose_path(m);
+	}
+	path = m->current_path;
 
-	m->current_path = path;
-	atomic_set(&m->count, m->min_io);
-	return 0;
+	spin_unlock_irqrestore(&m->current_lock, flags);
+
+	return path;
 }
 
 static int multipath_map(struct dm_target *ti, struct bio *bio,
@@ -637,23 +629,10 @@
 {
 	struct multipath *m = (struct multipath *) ti->private;
 	struct path *path;
-	unsigned long flags;
-
-	spin_lock_irqsave(&m->path_lock, flags);
-
-	/*
-	 * Do we need to choose a new path?
-	 */
-	if (!m->current_path || atomic_dec_and_test(&m->count)) {
-		if (__choose_path(m)) {
-			/* no paths */
-			spin_unlock_irqrestore(&m->path_lock, flags);
-			return -EIO;
-		}
-	}
 
-	path = m->current_path;
-	spin_unlock_irqrestore(&m->path_lock, flags);
+	path = get_current_path(m);
+	if (!path)
+		return -EIO;
 
 	/* map */
 	bio->bi_rw |= (1 << BIO_RW_FAILFAST);
@@ -664,46 +643,39 @@
 /*
  * Only called on the error path.
  */
-static struct path *__lookup_path(struct list_head *head,
-				  struct block_device *bdev)
-{
-	struct path *p;
-
-	list_for_each_entry (p, head, list)
-		if (p->dev->bdev == bdev)
-			return p;
-
-	return NULL;
-}
-
-static struct path *__find_path(struct multipath *m, struct block_device *bdev)
+static struct path *find_path(struct multipath *m, struct block_device *bdev)
 {
 	struct path *p;
 	struct priority_group *pg;
 
 	list_for_each_entry (pg, &m->priority_groups, list) {
-		p = __lookup_path(&pg->valid_paths, bdev);
-		if (p)
-			return p;
-
-		p = __lookup_path(&pg->invalid_paths, bdev);
-		if (p)
-			return p;
+		list_for_each_entry (p, &pg->paths, list) {
+			if (p->dev->bdev == bdev) {
+				return p;
+			}
+		}
 	}
 
 	return NULL;
 }
 
-static int __remap_io(struct multipath *m, struct bio *bio)
+static int remap_io(struct multipath *m, struct bio *bio)
 {
-	int r;
+	unsigned long flags;
+	struct path *path;
 
-	r = __choose_path(m);
-	if (r)
-		return r;
+	/* Force a new path to be chosen in case it
+	 * was the current_path that caused an error.
+	 */
+	spin_lock_irqsave(&m->current_lock, flags);
+	__choose_path(m);
+	spin_unlock_irqrestore(&m->current_lock, flags);
 
-	/* remap */
-	bio->bi_bdev = m->current_path->dev->bdev;
+	path = get_current_path(m);
+	if (!path)
+		return -EIO;
+
+	bio->bi_bdev = path->dev->bdev;
 	return 0;
 }
 
@@ -717,11 +689,9 @@
 	if (error) {
 		struct path *path;
 
-		spin_lock_irqsave(&m->path_lock, flags);
-		path = __find_path(m, bio->bi_bdev);
-		__fail_path(path);
-		r = __remap_io(m, bio);
-		spin_unlock_irqrestore(&m->path_lock, flags);
+		path = find_path(m, bio->bi_bdev);
+		fail_path(path);
+		r = remap_io(m, bio);
 
 		if (!r) {
 			/* queue for the daemon to resubmit */
@@ -752,7 +722,7 @@
 	iterate_paths(m, unlock_path);
 }
 
-static inline int __pg_count(struct multipath *m)
+static inline int pg_count(struct multipath *m)
 {
 	int count = 0;
 	struct priority_group *pg;
@@ -760,12 +730,11 @@
 	return count;
 }
 
-static inline int __path_count(struct priority_group *pg)
+static inline int path_count(struct priority_group *pg)
 {
 	int count = 0;
 	struct path *p;
-	list_for_each_entry(p, &pg->valid_paths, list) count++;
-	list_for_each_entry(p, &pg->invalid_paths, list) count++;
+	list_for_each_entry(p, &pg->paths, list) count++;
 	return count;
 }
 
@@ -786,38 +755,25 @@
 	struct path *p;
 	char buffer[32];
 
-	spin_lock_irqsave(&m->path_lock, flags);
-
 	switch (type) {
 	case STATUSTYPE_INFO:
-		sz += snprintf(result + sz, maxlen - sz, "%u ", __pg_count(m));
+		sz += snprintf(result + sz, maxlen - sz, "%u ", pg_count(m));
 
 		list_for_each_entry(pg, &m->priority_groups, list) {
 			sz += snprintf(result + sz, maxlen - sz, "%u %u ",
-                                       __path_count(pg),
+                                       path_count(pg),
 				       pg->ps->type->info_args);
 
-			list_for_each_entry(p, &pg->valid_paths, list) {
+			list_for_each_entry(p, &pg->paths, list) {
 				format_dev_t(buffer, p->dev->bdev->bd_dev);
+				spin_lock_irqsave(&p->failed_lock, flags);
 				sz += snprintf(result + sz, maxlen - sz,
-					       "%s A %u %u ", buffer,
-					       atomic_read(&p->fail_count),
-					       atomic_read(&p->fail_total));
-				pg->ps->type->status(pg->ps, p, type,
-						     result + sz, maxlen - sz);
-
-				sz = strlen(result);
-				if (sz >= maxlen)
-					break;
-			}
-			list_for_each_entry(p, &pg->invalid_paths, list) {
-				format_dev_t(buffer, p->dev->bdev->bd_dev);
-				sz += snprintf(result + sz, maxlen - sz,
-					       "%s F %u %u ", buffer,
-					       atomic_read(&p->fail_count),
-					       atomic_read(&p->fail_total));
+					       "%s %s %u %u ", buffer,
+					       p->has_failed ? "F" : "A",
+					       p->fail_count, p->fail_total);
 				pg->ps->type->status(pg->ps, p, type,
 						     result + sz, maxlen - sz);
+				spin_unlock_irqrestore(&p->failed_lock, flags);
 
 				sz = strlen(result);
 				if (sz >= maxlen)
@@ -829,25 +785,14 @@
 
 	case STATUSTYPE_TABLE:
 		sz += snprintf(result + sz, maxlen - sz, "%u %u ",
-			       m->test_interval, __pg_count(m));
+			       m->test_interval, pg_count(m));
 
 		list_for_each_entry(pg, &m->priority_groups, list) {
 			sz += snprintf(result + sz, maxlen - sz, "%u %s %u %u ",
 				       pg->priority, pg->ps->type->name,
-				       __path_count(pg), pg->ps->type->table_args);
-
-			list_for_each_entry(p, &pg->valid_paths, list) {
-				format_dev_t(buffer, p->dev->bdev->bd_dev);
-				sz += snprintf(result + sz, maxlen - sz,
-					       "%s ", buffer);
-				pg->ps->type->status(pg->ps, p, type,
-						     result + sz, maxlen - sz);
+				       path_count(pg), pg->ps->type->table_args);
 
-				sz = strlen(result);
-				if (sz >= maxlen)
-					break;
-			}
-			list_for_each_entry(p, &pg->invalid_paths, list) {
+			list_for_each_entry(p, &pg->paths, list) {
 				format_dev_t(buffer, p->dev->bdev->bd_dev);
 				sz += snprintf(result + sz, maxlen - sz,
 					       "%s ", buffer);
@@ -863,8 +808,6 @@
 		break;
 	}
 
-	spin_unlock_irqrestore(&m->path_lock, flags);
-
 	return 0;
 }
 





More information about the dm-devel mailing list