[dm-devel] [PATCH] Handle multipath paths in a path group properly during pg_init
Moger, Babu
Babu.Moger at lsi.com
Mon Jun 15 22:16:58 UTC 2009
I have seen this race condition earlier on my system. Problem is resolved after testing with this patch. Thanks.
--Babu Moger
> -----Original Message-----
> From: Chandra Seetharaman [mailto:sekharan at us.ibm.com]
> Sent: Friday, June 05, 2009 7:05 PM
> To: device-mapper development
> Cc: Alasdair G Kergon; Moger, Babu
> Subject: Re: [dm-devel] [PATCH] Handle multipath paths in a path group
> properly during pg_init
>
> Found a race condition where pg_init_in_progress being incremented when
> queue_work() returns without queuing the work.
>
> Changed the code to increment pg_init_in_progress only when queue_work
> returns success.
>
> Tested in 2.6.30-rc8
>
> ---------------------------------------------------
> From: Chandra Seetharaman <sekharan at us.ibm.com>
>
> Fixed a problem affecting reinstatement of passive paths and another
> problem with io lockup during device offline/online.
>
> Before we moved the hardware handler from dm to SCSI, it performed a
> pg_init
> for a path group and didn't maintain any state about each path in hardware
> handler code.
>
> But in SCSI dh, such state is now maintained, as we want to fail I/O early
> on a
> path if it is not the active path.
>
> All the hardware handlers have a state now and set to active or some form
> of
> inactive. They have prep_fn() which uses this state to fail the I/O
> without
> it ever being sent to the device.
>
> So in effect when dm-multipath calls scsi_dh_activate(), activate is
> sent to only one path and the "state" of that path is changed
> appropriately
> to "active" while other paths in the same path group are never changed
> as they never got an "activate".
>
> In order make sure all the paths in a path group gets their state set
> properly when a pg_init happens, we need to call scsi_dh_activate() on
> all paths in a path group.
>
> Doing this at the hardware handler layer is not a good option as we
> want the multipath layer to define the relationship between path and path
> groups and not the hardware handler.
>
> Attached patch sends an "activate" on each path in a path group when a
> path group is switched. It also sends an activate when a path is
> reinstated.
>
> Signed-off-by: Chandra Seetharaman <sekharan at us.ibm.com>
> Signed-off-by: Alasdair G Kergon <agk at redhat.com>
>
> ---
> drivers/md/dm-mpath.c | 63 +++++++++++++++++++++-----------------------
> ------
> 1 file changed, 26 insertions(+), 37 deletions(-)
>
> Index: linux-2.6.29/drivers/md/dm-mpath.c
> ===================================================================
> --- linux-2.6.29.orig/drivers/md/dm-mpath.c
> +++ linux-2.6.29/drivers/md/dm-mpath.c
> @@ -35,6 +35,7 @@ struct pgpath {
>
> struct dm_path path;
> struct work_struct deactivate_path;
> + struct work_struct activate_path;
> };
>
> #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
> @@ -64,8 +65,6 @@ struct multipath {
> spinlock_t lock;
>
> const char *hw_handler_name;
> - struct work_struct activate_path;
> - struct pgpath *pgpath_to_activate;
> unsigned nr_priority_groups;
> struct list_head priority_groups;
> unsigned pg_init_required; /* pg_init needs calling? */
> @@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void)
> if (pgpath) {
> pgpath->is_active = 1;
> INIT_WORK(&pgpath->deactivate_path, deactivate_path);
> + INIT_WORK(&pgpath->activate_path, activate_path);
> }
>
> return pgpath;
> @@ -160,7 +160,6 @@ static struct priority_group *alloc_prio
>
> static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
> {
> - unsigned long flags;
> struct pgpath *pgpath, *tmp;
> struct multipath *m = ti->private;
>
> @@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea
> if (m->hw_handler_name)
> scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
> dm_put_device(ti, pgpath->path.dev);
> - spin_lock_irqsave(&m->lock, flags);
> - if (m->pgpath_to_activate == pgpath)
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> free_pgpath(pgpath);
> }
> }
> @@ -202,7 +197,6 @@ static struct multipath *alloc_multipath
> m->queue_io = 1;
> INIT_WORK(&m->process_queued_ios, process_queued_ios);
> INIT_WORK(&m->trigger_event, trigger_event);
> - INIT_WORK(&m->activate_path, activate_path);
> m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
> if (!m->mpio_pool) {
> kfree(m);
> @@ -427,8 +421,8 @@ static void process_queued_ios(struct wo
> {
> struct multipath *m =
> container_of(work, struct multipath, process_queued_ios);
> - struct pgpath *pgpath = NULL;
> - unsigned init_required = 0, must_queue = 1;
> + struct pgpath *pgpath = NULL, *tmp;
> + unsigned must_queue = 1;
> unsigned long flags;
>
> spin_lock_irqsave(&m->lock, flags);
> @@ -446,19 +440,15 @@ static void process_queued_ios(struct wo
> must_queue = 0;
>
> if (m->pg_init_required && !m->pg_init_in_progress && pgpath) {
> - m->pgpath_to_activate = pgpath;
> m->pg_init_count++;
> m->pg_init_required = 0;
> - m->pg_init_in_progress = 1;
> - init_required = 1;
> + list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) {
> + if (queue_work(kmpath_handlerd, &tmp->activate_path))
> + m->pg_init_in_progress++;
> + }
> }
> -
> out:
> spin_unlock_irqrestore(&m->lock, flags);
> -
> - if (init_required)
> - queue_work(kmpath_handlerd, &m->activate_path);
> -
> if (!must_queue)
> dispatch_queued_ios(m);
> }
> @@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath
>
> pgpath->is_active = 1;
>
> - m->current_pgpath = NULL;
> - if (!m->nr_valid_paths++ && m->queue_size)
> + if (!m->nr_valid_paths++ && m->queue_size) {
> + m->current_pgpath = NULL;
> queue_work(kmultipathd, &m->process_queued_ios);
> + } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
> + if (queue_work(kmpath_handlerd, &pgpath->activate_path))
> + m->pg_init_in_progress++;
> + }
>
> dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
> pgpath->path.dev->name, m->nr_valid_paths);
> @@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path
>
> spin_lock_irqsave(&m->lock, flags);
> if (errors) {
> - DMERR("Could not failover device. Error %d.", errors);
> - m->current_pgpath = NULL;
> - m->current_pg = NULL;
> + if (pgpath == m->current_pgpath) {
> + DMERR("Could not failover device. Error %d.", errors);
> + m->current_pgpath = NULL;
> + m->current_pg = NULL;
> + }
> } else if (!m->pg_init_required) {
> m->queue_io = 0;
> pg->bypassed = 0;
> }
>
> - m->pg_init_in_progress = 0;
> - queue_work(kmultipathd, &m->process_queued_ios);
> + m->pg_init_in_progress--;
> + if (!m->pg_init_in_progress)
> + queue_work(kmultipathd, &m->process_queued_ios);
> spin_unlock_irqrestore(&m->lock, flags);
> }
>
> static void activate_path(struct work_struct *work)
> {
> int ret;
> - struct multipath *m =
> - container_of(work, struct multipath, activate_path);
> - struct dm_path *path;
> - unsigned long flags;
> + struct pgpath *pgpath =
> + container_of(work, struct pgpath, activate_path);
>
> - spin_lock_irqsave(&m->lock, flags);
> - path = &m->pgpath_to_activate->path;
> - m->pgpath_to_activate = NULL;
> - spin_unlock_irqrestore(&m->lock, flags);
> - if (!path)
> - return;
> - ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
> - pg_init_done(path, ret);
> + ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev));
> + pg_init_done(&pgpath->path, ret);
> }
>
> /*
>
>
>
More information about the dm-devel
mailing list