[dm-devel] Re: dm snapshot: stop merging using a completion
Mikulas Patocka
mpatocka at redhat.com
Mon Dec 7 11:22:28 UTC 2009
On Sun, 6 Dec 2009, Mike Snitzer wrote:
> On Sat, Dec 05 2009 at 9:01pm -0500,
> Mikulas Patocka <mpatocka at redhat.com> wrote:
>
> > On Sat, 5 Dec 2009, Mike Snitzer wrote:
> >
> > > Switch stop_merge() from using a busy loop to a completion event.
> > >
> > > stop_merge() now requests merging be shutdown using the
> > > 'merge_completion' pointer (instead of the 'merge_shutdown' flag). This
> > > is accomplished by testing if 'merge_completion' is not NULL in
> > > snapshot_merge_process(). stop_merge() allocates its completion on the
> > > stack and assigns it to the 'merge_completion' pointer in the snapshot.
> > > 'merge_completion' is protected by the snapshot's lock.
> > >
> > > Also changed the 'merge_running' flag from int to atomic_t.
> >
> > No, there's a bug:
> >
> > > static void stop_merge(struct dm_snapshot *s)
> > > {
> > > - while (s->merge_running) {
> > > - s->merge_shutdown = 1;
> > > - msleep(1);
> > > + DECLARE_COMPLETION_ONSTACK(merge_stopped);
> > > + if (atomic_read(&s->merge_running)) {
> >
> > --- if the merge stops exactly at this point (because it gets finished or
> > because of an i/o error), we are waiting for a completion that will be
> > never signalled.
>
> Yes, valid point. But for this rare corner case we could just use
> wait_for_completion_timeout() with a fairly large timeout; like 30 sec?
> That actually isn't a great option (racey)...
>
> How about if the 'shut:' code paths also checked for s->merge_completion
> and complete() if it is not NULL? Which means that check and related
> complete() code would become a function.
>
> > > + down_write(&s->lock);
> > > + s->merge_completion = &merge_stopped;
> > > + up_write(&s->lock);
> > > + wait_for_completion(&merge_stopped);
> > > }
> > > - s->merge_shutdown = 0;
> > > }
> > >
> > > /*
> >
> > For Alasdair: do you get the problem? If I write it with msleep()
> > correctly, you keep on complaining how unclean it is --- if it is written
> > with completions and it is wrong (because they are just harder to use
> > correctly than simple variables and msleep), you tend to support it. Now
> > you see in practice how complex constructs tend to trigger bugs.
> >
> > Mike: I thought that the completion would be in struct dm_snapshot. But
> > maybe, try it with wait_on_bit / wake_up_bit / test_bit / set_bit etc., it
> > may be easier than completions.
>
> I can look at it; but I think using a completion can work.
>
> Mike
Here it is with bits:
---
Use bits instead of variables.
Signed-off-by: Mikulas Patocka <mpatocka at redhat.com>
---
drivers/md/dm-snap.c | 39 +++++++++++++++++++++++----------------
1 file changed, 23 insertions(+), 16 deletions(-)
Index: linux-2.6.32-devel/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.32-devel.orig/drivers/md/dm-snap.c 2009-12-07 11:37:53.000000000 +0100
+++ linux-2.6.32-devel/drivers/md/dm-snap.c 2009-12-07 11:58:38.000000000 +0100
@@ -102,12 +102,14 @@ struct dm_snapshot {
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
+ unsigned long bits;
+
/* Merge operation is in progress */
- int merge_running;
+#define MERGE_RUNNING 0
/* It is requested to shut down merging */
/* Cleared back to 0 when the merging is stopped */
- int merge_shutdown;
+#define SHUTDOWN_MERGE 1
/* Merging this area --- block any writes */
chunk_t merge_write_interlock;
@@ -762,8 +764,8 @@ static void snapshot_merge_process(struc
int must_wait;
struct dm_io_region src, dest;
- BUG_ON(!s->merge_running);
- if (s->merge_shutdown)
+ BUG_ON(!test_bit(MERGE_RUNNING, &s->bits));
+ if (unlikely(test_bit(SHUTDOWN_MERGE, &s->bits)))
goto shut;
if (!s->valid) {
@@ -823,7 +825,9 @@ test_again:
return;
shut:
- s->merge_running = 0;
+ clear_bit_unlock(MERGE_RUNNING, &s->bits);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&s->bits, MERGE_RUNNING);
}
/* This function drops s->lock */
@@ -904,15 +908,21 @@ static void merge_callback(int read_err,
shut:
down_write(&s->lock);
release_write_interlock(s, 1);
- s->merge_running = 0;
+ clear_bit_unlock(MERGE_RUNNING, &s->bits);
+ smp_mb__after_clear_bit();
+ wake_up_bit(&s->bits, MERGE_RUNNING);
}
static void start_merge(struct dm_snapshot *s)
{
- if (!s->merge_running && !s->merge_shutdown) {
- s->merge_running = 1;
+ if (!test_and_set_bit(MERGE_RUNNING, &s->bits))
snapshot_merge_process(s);
- }
+}
+
+static int wait_schedule(void *ptr)
+{
+ schedule();
+ return 0;
}
/*
@@ -920,11 +930,9 @@ static void start_merge(struct dm_snapsh
*/
static void stop_merge(struct dm_snapshot *s)
{
- while (s->merge_running) {
- s->merge_shutdown = 1;
- msleep(1);
- }
- s->merge_shutdown = 0;
+ set_bit(SHUTDOWN_MERGE, &s->bits);
+ wait_on_bit(&s->bits, MERGE_RUNNING, wait_schedule, TASK_UNINTERRUPTIBLE);
+ clear_bit(SHUTDOWN_MERGE, &s->bits);
}
/*
@@ -991,8 +999,7 @@ static int snapshot_ctr(struct dm_target
init_rwsem(&s->lock);
INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
- s->merge_running = 0;
- s->merge_shutdown = 0;
+ s->bits = 0;
s->merge_write_interlock = 0;
s->merge_write_interlock_n = 0;
bio_list_init(&s->merge_write_list);
More information about the dm-devel
mailing list