[dm-devel] [PATCH] multipath: clean up code for stopping the waiter threads

Benjamin Marzinski bmarzins at redhat.com
Sat May 19 06:37:03 UTC 2012


The way multipathd currently stops the waiter threads needs some work.
Right now they are stopped by being sent the SIGUSR1 signal. However their
cleanup code assumes that they are being cancelled, just like all the other
threads are.  There's no reason for them to be so unnecessarily
complicated and different from the other threads

This patch does a couple of things.  First, it removes the mutex from
the event_thread.  This wasn't doing anything. It was designed to protect
the wp->mapname variable, which the waiter threads were checking to see
if they should quit. However, the mutex was only ever being used by the
thread itself, and it clearly didn't need to serialize with itself.  Also,
the function to clear the mapname, signal_waiter(), was set with
pthread_cleanup_push(), which never got called early, since the threads
weren't being cancelled.  Thus, the mapname never got cleared
until the pthreads were about to shut down.

The patch also rips out all the signal stopping code, and just uses
pthread_cancel.  There already are cancellation points in the waiter
thread code. Between the cancellation points, both explicit and implicit,
and the fact that the waiter threads will never be killed except when the
killer is holding the vecs lock, there shouldn't be any place where the
waiter thread can access freed data.

To make sure the waiter thread cleans itself up properly, the dmt
has been moved into the event_thread structure, and is destroyed in
free_waiter() if necessary.

Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
---
 libmultipath/waiter.c |   74 +++++++++++---------------------------------------
 libmultipath/waiter.h |    4 +-
 2 files changed, 19 insertions(+), 59 deletions(-)

Index: multipath-tools-120518/libmultipath/waiter.c
===================================================================
--- multipath-tools-120518.orig/libmultipath/waiter.c
+++ multipath-tools-120518/libmultipath/waiter.c
@@ -29,23 +29,17 @@ struct event_thread *alloc_waiter (void)
 
 	wp = (struct event_thread *)MALLOC(sizeof(struct event_thread));
 	memset(wp, 0, sizeof(struct event_thread));
-	pthread_mutex_init(&wp->lock, NULL);
 
 	return wp;
 }
 
-void signal_waiter (void *data)
+void free_waiter (void *data)
 {
 	struct event_thread *wp = (struct event_thread *)data;
 
-	pthread_mutex_lock(&wp->lock);
-	memset(wp->mapname, 0, WWID_SIZE);
-	pthread_mutex_unlock(&wp->lock);
-}
+	if (wp->dmt)
+		dm_task_destroy(wp->dmt);
 
-void free_waiter (struct event_thread *wp)
-{
-	pthread_mutex_destroy(&wp->lock);
 	FREE(wp);
 }
 
@@ -58,83 +52,56 @@ void stop_waiter_thread (struct multipat
 	}
 	condlog(2, "%s: stop event checker thread (%lu)", mpp->alias,
 		mpp->waiter);
-	pthread_kill(mpp->waiter, SIGUSR1);
+	pthread_cancel(mpp->waiter);
 	mpp->waiter = (pthread_t)0;
 }
 
-static sigset_t unblock_signals(void)
-{
-	sigset_t set, old;
-
-	sigemptyset(&set);
-	sigaddset(&set, SIGHUP);
-	sigaddset(&set, SIGUSR1);
-	pthread_sigmask(SIG_UNBLOCK, &set, &old);
-	return old;
-}
-
 /*
  * returns the reschedule delay
  * negative means *stop*
  */
 int waiteventloop (struct event_thread *waiter)
 {
-	sigset_t set;
-	struct dm_task *dmt = NULL;
 	int event_nr;
 	int r;
 
-	pthread_mutex_lock(&waiter->lock);
 	if (!waiter->event_nr)
 		waiter->event_nr = dm_geteventnr(waiter->mapname);
 
-	if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT))) {
+	if (!(waiter->dmt = dm_task_create(DM_DEVICE_WAITEVENT))) {
 		condlog(0, "%s: devmap event #%i dm_task_create error",
 				waiter->mapname, waiter->event_nr);
-		pthread_mutex_unlock(&waiter->lock);
 		return 1;
 	}
 
-	if (!dm_task_set_name(dmt, waiter->mapname)) {
+	if (!dm_task_set_name(waiter->dmt, waiter->mapname)) {
 		condlog(0, "%s: devmap event #%i dm_task_set_name error",
 				waiter->mapname, waiter->event_nr);
-		dm_task_destroy(dmt);
-		pthread_mutex_unlock(&waiter->lock);
+		dm_task_destroy(waiter->dmt);
+		waiter->dmt = NULL;
 		return 1;
 	}
 
-	if (waiter->event_nr && !dm_task_set_event_nr(dmt,
+	if (waiter->event_nr && !dm_task_set_event_nr(waiter->dmt,
 						      waiter->event_nr)) {
 		condlog(0, "%s: devmap event #%i dm_task_set_event_nr error",
 				waiter->mapname, waiter->event_nr);
-		dm_task_destroy(dmt);
-		pthread_mutex_unlock(&waiter->lock);
+		dm_task_destroy(waiter->dmt);
+		waiter->dmt = NULL;
 		return 1;
 	}
-	pthread_mutex_unlock(&waiter->lock);
 
-	dm_task_no_open_count(dmt);
-
-	/* accept wait interruption */
-	set = unblock_signals();
+	dm_task_no_open_count(waiter->dmt);
 
 	/* wait */
-	r = dm_task_run(dmt);
-
-	/* wait is over : event or interrupt */
-	pthread_sigmask(SIG_SETMASK, &set, NULL);
+	r = dm_task_run(waiter->dmt);
 
-	dm_task_destroy(dmt);
+	dm_task_destroy(waiter->dmt);
+	waiter->dmt = NULL;
 
 	if (!r)	/* wait interrupted by signal */
 		return -1;
 
-	pthread_mutex_lock(&waiter->lock);
-	if (!strlen(waiter->mapname)) {
-		/* waiter should exit */
-		pthread_mutex_unlock(&waiter->lock);
-		return -1;
-	}
 	waiter->event_nr++;
 
 	/*
@@ -164,20 +131,16 @@ int waiteventloop (struct event_thread *
 		if (r) {
 			condlog(2, "%s: event checker exit",
 				waiter->mapname);
-			pthread_mutex_unlock(&waiter->lock);
 			return -1; /* stop the thread */
 		}
 
 		event_nr = dm_geteventnr(waiter->mapname);
 
-		if (waiter->event_nr == event_nr) {
-			pthread_mutex_unlock(&waiter->lock);
+		if (waiter->event_nr == event_nr)
 			return 1; /* upon problem reschedule 1s later */
-		}
 
 		waiter->event_nr = event_nr;
 	}
-	pthread_mutex_unlock(&waiter->lock);
 	return -1; /* never reach there */
 }
 
@@ -189,7 +152,7 @@ void *waitevent (void *et)
 	mlockall(MCL_CURRENT | MCL_FUTURE);
 
 	waiter = (struct event_thread *)et;
-	pthread_cleanup_push(signal_waiter, et);
+	pthread_cleanup_push(free_waiter, et);
 
 	block_signal(SIGUSR1, NULL);
 	block_signal(SIGHUP, NULL);
@@ -203,7 +166,6 @@ void *waitevent (void *et)
 	}
 
 	pthread_cleanup_pop(1);
-	free_waiter(waiter);
 	return NULL;
 }
 
@@ -219,10 +181,8 @@ int start_waiter_thread (struct multipat
 	if (!wp)
 		goto out;
 
-	pthread_mutex_lock(&wp->lock);
 	strncpy(wp->mapname, mpp->alias, WWID_SIZE);
 	wp->vecs = vecs;
-	pthread_mutex_unlock(&wp->lock);
 
 	if (pthread_create(&wp->thread, &waiter_attr, waitevent, wp)) {
 		condlog(0, "%s: cannot create event checker", wp->mapname);
Index: multipath-tools-120518/libmultipath/waiter.h
===================================================================
--- multipath-tools-120518.orig/libmultipath/waiter.h
+++ multipath-tools-120518/libmultipath/waiter.h
@@ -4,15 +4,15 @@
 extern pthread_attr_t waiter_attr;
 
 struct event_thread {
+	struct dm_task *dmt;
 	pthread_t thread;
-	pthread_mutex_t lock;
 	int event_nr;
 	char mapname[WWID_SIZE];
 	struct vectors *vecs;
 };
 
 struct event_thread * alloc_waiter (void);
-void signal_waiter (void *data);
+void free_waiter (void *data);
 void stop_waiter_thread (struct multipath *mpp, struct vectors *vecs);
 int start_waiter_thread (struct multipath *mpp, struct vectors *vecs);
 int waiteventloop (struct event_thread *waiter);




More information about the dm-devel mailing list