[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] Patch: making DLM more robust



Hi,

An easy first step to make DLM more robust can be adding a time out protection
to the lock space cration operation, while waiting for a "dlm_controld" action.
A new memeber "ci_dlm_controld_secs" is added to "dlm_config" to set up time out
in seconds, DEFAULT_DLM_CTRL_SECS is 5 seconds.

At the same time, signals can be enabled and handled, too.

DLM_USER_CREATE_LOCKSPACE will be able to return new error codes:
-EINTR or -ETIMEDOUT.

Could you please tell me why the signals are blocked within "device_write()"?
I think it is safe to allow signals, surely in your original code sequences
waiting in an uninterruptible way.

BTW "sigprocmask()" already contains "recalc_sigpending()".

 out_sig:
    sigprocmask(SIG_SETMASK, &tmpsig, NULL);
    recalc_sigpending();


Thanks,

Zoltan Menyhart
diff -Nru linux-2.6.32.x86_64-old/fs/dlm/config.c linux-2.6.32.x86_64/fs/dlm/config.c
--- linux-2.6.32.x86_64-old/fs/dlm/config.c	2010-11-30 16:44:49.000000000 +0100
+++ linux-2.6.32.x86_64/fs/dlm/config.c	2010-11-30 17:12:00.000000000 +0100
@@ -99,6 +99,7 @@
 	unsigned int cl_log_debug;
 	unsigned int cl_protocol;
 	unsigned int cl_timewarn_cs;
+	unsigned int cl_dlm_controld_secs;	/* dlm_controld response time-out */
 };
 
 enum {
@@ -113,6 +114,7 @@
 	CLUSTER_ATTR_LOG_DEBUG,
 	CLUSTER_ATTR_PROTOCOL,
 	CLUSTER_ATTR_TIMEWARN_CS,
+	CLUSTER_ATTR_DLM_CTRL_SECS,		/* dlm_controld response time-out */
 };
 
 struct cluster_attribute {
@@ -165,6 +167,7 @@
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
 CLUSTER_ATTR(timewarn_cs, 1);
+CLUSTER_ATTR(dlm_controld_secs, 1);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -178,6 +181,7 @@
 	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
 	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
+	[CLUSTER_ATTR_DLM_CTRL_SECS] = &cluster_attr_dlm_controld_secs.attr,
 	NULL,
 };
 
@@ -438,6 +442,7 @@
 	cl->cl_log_debug = dlm_config.ci_log_debug;
 	cl->cl_protocol = dlm_config.ci_protocol;
 	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
+	cl->cl_dlm_controld_secs = dlm_config.ci_dlm_controld_secs;
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -1010,7 +1015,8 @@
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
-#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
+#define DEFAULT_TIMEWARN_CS      500	/* 5 sec = 500 centiseconds */
+#define DEFAULT_DLM_CTRL_SECS	   5	/* dlm_controld response time-out */
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -1023,6 +1029,7 @@
 	.ci_scan_secs = DEFAULT_SCAN_SECS,
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
 	.ci_protocol = DEFAULT_PROTOCOL,
-	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS
+	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
+	.ci_dlm_controld_secs = DEFAULT_DLM_CTRL_SECS,
 };
 
diff -Nru linux-2.6.32.x86_64-old/fs/dlm/config.h linux-2.6.32.x86_64/fs/dlm/config.h
--- linux-2.6.32.x86_64-old/fs/dlm/config.h	2010-11-30 16:44:49.000000000 +0100
+++ linux-2.6.32.x86_64/fs/dlm/config.h	2010-11-30 17:15:43.000000000 +0100
@@ -28,6 +28,7 @@
 	int ci_log_debug;
 	int ci_protocol;
 	int ci_timewarn_cs;
+	int ci_dlm_controld_secs;	/* dlm_controld response time-out */
 };
 
 extern struct dlm_config_info dlm_config;
diff -Nru linux-2.6.32.x86_64-old/fs/dlm/lockspace.c linux-2.6.32.x86_64/fs/dlm/lockspace.c
--- linux-2.6.32.x86_64-old/fs/dlm/lockspace.c	2010-11-30 16:44:49.000000000 +0100
+++ linux-2.6.32.x86_64/fs/dlm/lockspace.c	2010-11-30 17:35:10.000000000 +0100
@@ -568,7 +568,12 @@
 	if (error)
 		goto out_stop;
 
-	wait_for_completion(&ls->ls_members_done);
+	error = wait_for_completion_interruptible_timeout(&ls->ls_members_done,
+					dlm_config.ci_dlm_controld_secs * HZ);
+	if (error){
+		error = signal_pending(current) ? -EINTR : -ETIMEDOUT;
+		goto out_members;
+	}
 	error = ls->ls_members_result;
 	if (error)
 		goto out_members;

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]