[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Linux-cluster] [RFC][PATCH] Add ability to freeze e service.



Hi,

like discussed with lon on IRC I'm trying to add to rgmanager the
ability to freeze a service. I worked on it in these days and did an
example patch. Here is how I think what a "freeze" can be and, of
course, it can be implemented in many other ways so it's only an
example.

== What freeze means? ==

All actions on the service are blocked (start, stop, status) so you can
work by hand on the various resources. When you unfreeze the service
everything returns as before (so if you manually stopped a resource then
the status will fail and the rg recovery is done).

== When does a service can be freezed? ==

You can freeze only if the service status is DISABLED, STOPPED, or
STARTED. It doesn't have sense to freeze a service that is in a
transictional state.

== How is it implemented? ==

*) As I don't want to lose the previous state and I don't think it's a
service state, "freezed" is implemented like a service flag.
As a  "service flag" didn't existed before, this patch adds it to
rg_state_t, so it will be transmitted around the cluster.

*) Two options are added to clusvcasm (-F to freeze, -U to unfreeze),
obviously these options names can be changed (perhaps they can be only a
long option like --freeze, --unfreeze?).
So you can freeze with:
	#clusvcadm -F $SERVICE
and unfreeze with:
	#clusvcadm -U $SERVICE

*) clustat reports these new flags in 2 ways: on normal mode the flags
are between () and in long mode e new line "Flags:" is added. The
functions added in rg_strings.c aren't well tested but should work with
multiple flags.

*) !!!! In the patch I haven't changed the function
handle_start_remote_req because looking at the code I cannot find when
it can be called. Maybe I'm missing something... :D


Thanks!

Bye!

-- 
Simone Gotti

 
 
 --
 Email.it, the professional e-mail, gratis per te: http://www.email.it/f
 
 Sponsor:
 Lo sai che hai un tesoro in soffitta? Quello che non serve più a te, può servire agli altri.
* Vendi GRATIS ciò che vuoi con AdBoom.it
 Clicca qui: http://adv.email.it/cgi-bin/foclick.cgi?mid=6418&d=23-4
Index: include/resgroup.h
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/include/resgroup.h,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 resgroup.h
--- include/resgroup.h	20 Mar 2007 17:09:56 -0000	1.19
+++ include/resgroup.h	22 Apr 2007 18:09:40 -0000
@@ -34,6 +34,7 @@ typedef struct {
 	uint32_t	rs_state;	/**< State of service. */
 	uint32_t	rs_restarts;	/**< Number of cluster-induced 
 					     restarts */
+	uint32_t	rs_flags;	/**< User setted flags */
 	uint64_t	rs_transition;	/**< Last service transition time */
 } rg_state_t;
 
@@ -45,6 +46,7 @@ typedef struct {
 	swab32((ptr)->rs_last_owner);\
 	swab32((ptr)->rs_state);\
 	swab32((ptr)->rs_restarts);\
+	swab32((ptr)->rs_flags);\
 	swab64((ptr)->rs_transition);\
 }
 
@@ -79,6 +81,8 @@ typedef struct {
 #define RG_UNLOCK	  20
 #define RG_QUERY_LOCK	  21
 #define RG_MIGRATE	  22
+#define RG_FREEZE	  23
+#define RG_UNFREEZE	  24
 #define RG_NONE		  999
 
 const char *rg_req_str(int req);
@@ -105,7 +109,11 @@ int handle_start_remote_req(char *svcNam
 
 #define DEFAULT_CHECK_INTERVAL		10
 
+/* Resource group flags (for now) */
+#define RG_FLAG_FREEZED			(1<<0)	/** Resource freezed */
+
 const char *rg_state_str(int val);
+void rg_flags_str(char *flags_string, size_t size, int val);
 const char *agent_op_str(int val);
 
 int eval_groups(int local, uint32_t nodeid, int nodeStatus);
@@ -121,6 +129,8 @@ int svc_stop(char *svcName, int error);
 int svc_status(char *svcName);
 int svc_disable(char *svcName);
 int svc_fail(char *svcName);
+int svc_freeze(char *svcName);
+int svc_unfreeze(char *svcName);
 int svc_migrate(char *svcName, int target);
 int rt_enqueue_request(const char *resgroupname, int request,
 		       msgctx_t *resp_ctx,
@@ -162,6 +172,7 @@ cluster_member_list_t *member_list(void)
 int my_id(void);
 
 /* Return codes */
+#define RG_EFREEZED	-11		/* Service is freezed */
 #define RG_ERUN		-10		/* Service is already running */
 #define RG_EQUORUM	-9		/* Operation requires quorum */
 #define RG_EINVAL	-8		/* Invalid operation for resource */
Index: src/clulib/rg_strings.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/clulib/rg_strings.c,v
retrieving revision 1.7
diff -u -b -B -p -r1.7 rg_strings.c
--- src/clulib/rg_strings.c	10 Mar 2007 00:20:54 -0000	1.7
+++ src/clulib/rg_strings.c	22 Apr 2007 18:09:40 -0000
@@ -35,6 +35,7 @@ const struct string_val rg_error_strings
 	{ RG_ENOSERVICE,"Service does not exist" },
 	{ RG_EFORWARD,	"Service not mastered locally" },
 	{ RG_EABORT,	"Aborted; service failed" },
+	{ RG_EFREEZED,  "Failure: Service is freezed"},
 	{ RG_EFAIL,	"Failure" },
 	{ RG_ESUCCESS,	"Success" },
 	{ RG_YES,	"Yes" },
@@ -88,6 +89,12 @@ const struct string_val rg_state_strings
 };
 
 
+const struct string_val rg_flags_strings[] = {
+	{RG_FLAG_FREEZED, "freezed"},
+	{0, NULL}
+};
+
+
 const struct string_val agent_ops[] = {
 	{RS_START, "start"},
 	{RS_STOP, "stop"},
@@ -122,6 +129,20 @@ rg_search_table(const struct string_val 
 }
 
 
+static inline const char *
+rg_flag_search_table(const struct string_val *table, int val)
+{
+	int x;
+
+	for (x = 0; table[x].str != NULL; x++) {
+		if (table[x].val == val) {
+			return table[x].str;
+		}
+	}
+
+	return "Unknown";
+}
+
 const char *
 rg_strerror(int val)
 {
@@ -134,6 +155,22 @@ rg_state_str(int val)
 	return rg_search_table(rg_state_strings, val);
 }
 
+void
+rg_flags_str(char *flags_string, size_t size, int val)
+{
+	int i;
+	const char *string;
+	char *separator = ", ";
+
+	for (i = 0; i < sizeof(uint32_t); i++) {
+		if ( val & (1 << i)) {
+			if (strlen(flags_string))
+				strncat(flags_string, separator, size - (strlen(flags_string) + strlen(separator) + 1));
+			string = rg_search_table(rg_flags_strings, (1 << i));
+			strncat(flags_string, string, size - (strlen(flags_string) + strlen(string) + 1));
+		}
+	}
+}
 
 const char *
 rg_req_str(int val)
Index: src/daemons/groups.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/groups.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 groups.c
--- src/daemons/groups.c	19 Apr 2007 17:59:36 -0000	1.31
+++ src/daemons/groups.c	22 Apr 2007 18:09:41 -0000
@@ -376,6 +376,9 @@ consider_start(resource_node_t *node, ch
 	mp = memb_id_to_p(membership, my_id());
 	assert(mp);
 
+	/* Service cannot be started if Freezed */
+	if (svcStatus->rs_flags & RG_FLAG_FREEZED)
+		return;
 	/*
 	 * Service must be not be running elsewhere to consider for a
 	 * local start.
Index: src/daemons/rg_state.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_state.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 rg_state.c
--- src/daemons/rg_state.c	19 Apr 2007 17:59:36 -0000	1.31
+++ src/daemons/rg_state.c	22 Apr 2007 18:09:42 -0000
@@ -282,6 +282,7 @@ init_rg(char *name, rg_state_t *svcblk)
 	svcblk->rs_owner = 0;
 	svcblk->rs_last_owner = 0;
 	svcblk->rs_state = RG_STATE_STOPPED;
+       	svcblk->rs_flags = 0;
        	svcblk->rs_restarts = 0;
 	svcblk->rs_transition = 0;	
 	strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -418,6 +419,7 @@ get_rg_state_local(char *name, rg_state_
 		svcblk->rs_owner = 0;
 		svcblk->rs_last_owner = 0;
 		svcblk->rs_state = RG_STATE_UNINITIALIZED;
+       		svcblk->rs_flags = 0;
        		svcblk->rs_restarts = 0;
 		svcblk->rs_transition = 0;	
 		strncpy(svcblk->rs_name, name, sizeof(svcblk->rs_name));
@@ -446,6 +448,7 @@ get_rg_state_local(char *name, rg_state_
  *			2 = DO NOT stop service, return 0 (success)
  *                      3 = DO NOT stop service, return RG_EFORWARD
  *			4 = DO NOT stop service, return RG_EAGAIN
+ *			5 = DO NOT stop service, return RG_EFREEZED
  */
 int
 svc_advise_stop(rg_state_t *svcStatus, char *svcName, int req)
@@ -453,6 +456,11 @@ svc_advise_stop(rg_state_t *svcStatus, c
 	cluster_member_list_t *membership = member_list();
 	int ret = 0;
 	
+	if (svcStatus->rs_flags & RG_FLAG_FREEZED) {
+		clulog(LOG_DEBUG, "RG %s FREEZED!!!\n", svcName);
+		return 5;
+	}
+
 	switch(svcStatus->rs_state) {
 	case RG_STATE_FAILED:
 		if (req == RG_DISABLE)
@@ -568,6 +576,7 @@ svc_advise_stop(rg_state_t *svcStatus, c
  *			2 = DO NOT start service, return 0
  *			3 = DO NOT start service, return RG_EAGAIN
  *			4 = DO NOT start service, return RG_ERUN
+ *			5 = DO NOT start service, return RG_EFREEZED
  */
 int
 svc_advise_start(rg_state_t *svcStatus, char *svcName, int req)
@@ -575,6 +584,11 @@ svc_advise_start(rg_state_t *svcStatus, 
 	cluster_member_list_t *membership = member_list();
 	int ret = 0;
 	
+	if (svcStatus->rs_flags & RG_FLAG_FREEZED) {
+		clulog(LOG_DEBUG, "RG %s FREEZED!!!\n", svcName);
+		return 5;
+	}
+
 	switch(svcStatus->rs_state) {
 	case RG_STATE_FAILED:
 		clulog(LOG_ERR,
@@ -752,6 +766,9 @@ svc_start(char *svcName, int req)
 	case 4:
 		rg_unlock(&lockp);
 		return RG_ERUN;
+	case 5:
+		rg_unlock(&lockp);
+		return RG_EFREEZED;
 	default:
 		break;
 	}
@@ -914,6 +931,8 @@ svc_status(char *svcName)
 	}
 	rg_unlock(&lockp);
 
+	if (svcStatus.rs_flags & RG_FLAG_FREEZED)
+		return 0;
 	if (svcStatus.rs_owner != my_id())
 		/* Don't check status for anything not owned */
 		return 0;
@@ -961,6 +980,26 @@ svc_status(char *svcName)
 int
 svc_status_inquiry(char *svcName)
 {
+	struct dlm_lksb lockp;
+	rg_state_t svcStatus;
+
+	if (rg_lock(svcName, &lockp) < 0) {
+		clulog(LOG_ERR, "#48: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return RG_EFAIL;
+	}
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#49: Failed getting status for RG %s\n",
+		       svcName);
+		return RG_EFAIL;
+	}
+	rg_unlock(&lockp);
+
+	if (svcStatus.rs_flags & RG_FLAG_FREEZED)
+		return 0;
+	
 	return group_op(svcName, RG_STATUS);
 }
 
@@ -1015,6 +1054,9 @@ _svc_stop(char *svcName, int req, int re
 	case 4:
 		rg_unlock(&lockp);
 		return RG_EAGAIN;
+	case 5:
+		rg_unlock(&lockp);
+		return RG_EFREEZED;
 	default:
 		break;
 	}
@@ -1191,6 +1233,76 @@ svc_fail(char *svcName)
 	return 0;
 }
 
+/**
+ * Flag a cluster service as freezed/unfreezed.
+ *
+ * @param svcName	Service ID to flag as freezed.
+ * @return		FAIL, 0
+ */
+int
+_svc_freeze(char *svcName, int enabled)
+{
+	struct dlm_lksb lockp;
+	rg_state_t svcStatus;
+
+	if (rg_lock(svcName, &lockp) == RG_EFAIL) {
+		clulog(LOG_ERR, "#55: Unable to obtain cluster lock: %s\n",
+		       strerror(errno));
+		return RG_EFAIL;
+	}
+
+	clulog(LOG_DEBUG, "Handling %s request for RG %s\n", svcName, enabled?"freeze":"unfreeze");
+
+	if (get_rg_state(svcName, &svcStatus) != 0) {
+		rg_unlock(&lockp);
+		clulog(LOG_ERR, "#56: Failed getting status for RG %s\n",
+		       svcName);
+		return RG_EFAIL;
+	}
+
+	switch(svcStatus.rs_state) {
+	case RG_STATE_STOPPED:
+	case RG_STATE_STARTED:
+	case RG_STATE_DISABLED:
+
+		if (enabled == 1) {
+			clulog(LOG_DEBUG, "Freezing RG %s\n", svcName);
+			svcStatus.rs_flags |= RG_FLAG_FREEZED;
+		} else {
+			clulog(LOG_DEBUG, "Unfreezing RG %s\n", svcName);
+			svcStatus.rs_flags &= ~RG_FLAG_FREEZED;
+		}
+
+		if (set_rg_state(svcName, &svcStatus) != 0) {
+			rg_unlock(&lockp);
+			clulog(LOG_ERR, "#57: Failed changing RG status\n");
+			return RG_EFAIL;
+		}
+		break;
+
+	default:
+		rg_unlock(&lockp);
+		return RG_EFAIL;
+		break;
+	}
+
+	rg_unlock(&lockp);
+
+	return 0;
+}
+
+int
+svc_freeze(char *svcName)
+{
+	return _svc_freeze(svcName, 1);
+}
+
+int
+svc_unfreeze(char *svcName)
+{
+	return _svc_freeze(svcName, 0);
+}
+
 
 /*
  * Send a message to the target node to start the service.
@@ -1324,6 +1436,9 @@ handle_relocate_req(char *svcName, int r
 			svc_fail(svcName);
 			return RG_EFAIL;
 		}
+		if (ret == RG_EFREEZED) {
+			return RG_EFREEZED;
+		}
 		if (ret == RG_EFORWARD)
 			return RG_EFORWARD;
 	}
@@ -1531,7 +1646,7 @@ handle_start_req(char *svcName, int req,
 	/* 
 	   If services are locked, return the error 
 	  */
-	if (ret == RG_EAGAIN || ret == RG_ERUN)
+	if (ret == RG_EAGAIN || ret == RG_ERUN || ret == RG_EFREEZED)
 		return ret;
 
 	/*
Index: src/daemons/rg_thread.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/daemons/rg_thread.c,v
retrieving revision 1.19
diff -u -b -B -p -r1.19 rg_thread.c
--- src/daemons/rg_thread.c	27 Mar 2007 19:33:20 -0000	1.19
+++ src/daemons/rg_thread.c	22 Apr 2007 18:09:43 -0000
@@ -422,6 +422,18 @@ resgroup_thread_main(void *arg)
 
 			break;
 
+		case RG_FREEZE:
+			error = svc_freeze(myname);
+			if (error != 0)
+				ret = RG_EFAIL;
+			break;
+
+		case RG_UNFREEZE:
+			error = svc_unfreeze(myname);
+			if (error != 0)
+				ret = RG_EFAIL;
+			break;
+
 		default:
 			printf("Unhandled request %d\n", req->rr_request);
 			ret = RG_NONE;
Index: src/utils/clustat.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clustat.c,v
retrieving revision 1.31
diff -u -b -B -p -r1.31 clustat.c
--- src/utils/clustat.c	6 Feb 2007 20:21:17 -0000	1.31
+++ src/utils/clustat.c	22 Apr 2007 18:09:43 -0000
@@ -416,7 +416,7 @@ void
 _txt_rg_state(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
 	char owner[31];
-
+	char flags_string[255] = "";
 
 	if (rs->rs_state == RG_STATE_STOPPED ||
 	    rs->rs_state == RG_STATE_DISABLED ||
@@ -430,19 +430,34 @@ _txt_rg_state(rg_state_t *rs, cluster_me
 		snprintf(owner, sizeof(owner), "%-.30s",
 			 my_memb_id_to_name(members, rs->rs_owner));
 	}
-	printf("  %-20.20s %-30.30s %-16.16s\n",
+	rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags);
+	printf("  %-20.20s %-30.30s %-16.16s",
 	       rs->rs_name,
 	       owner,
 	       rg_state_str(rs->rs_state));
+	if(strlen(flags_string))
+		printf ("(%s)\n", flags_string);
+	else
+		printf("\n");
 }
 
 
 void
 _txt_rg_state_v(rg_state_t *rs, cluster_member_list_t *members, int flags)
 {
+	char flags_string[255] = "";
+
+	rg_flags_str(flags_string, sizeof(flags_string), rs->rs_flags);
+
 	printf("Service Name      : %s\n", rs->rs_name);
 	printf("  Current State   : %s (%d)\n",
 	       rg_state_str(rs->rs_state), rs->rs_state);
+	if (rs->rs_flags)
+		printf("  Flags           : %s (%d)\n",
+		       flags_string, rs->rs_flags);
+	else
+		printf("  Flags           : none (%d)\n",
+		       rs->rs_flags);
 	printf("  Owner           : %s\n",
 	       my_memb_id_to_name(members, rs->rs_owner));
 	printf("  Last Owner      : %s\n",
Index: src/utils/clusvcadm.c
===================================================================
RCS file: /cvs/cluster/cluster/rgmanager/src/utils/clusvcadm.c,v
retrieving revision 1.18
diff -u -b -B -p -r1.18 clusvcadm.c
--- src/utils/clusvcadm.c	20 Mar 2007 17:09:57 -0000	1.18
+++ src/utils/clusvcadm.c	22 Apr 2007 18:09:44 -0000
@@ -240,7 +240,7 @@ main(int argc, char **argv)
 		return 1;
 	}
 
-	while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:qh?")) != EOF) {
+	while ((opt = getopt(argc, argv, "lSue:M:d:r:n:m:vR:s:F:U:qh?")) != EOF) {
 		switch (opt) {
 		case 'l':
 			return do_lock();
@@ -294,6 +294,16 @@ main(int argc, char **argv)
 		case 'v':
 			printf("%s\n",PACKAGE_VERSION);
 			return 0;
+		case 'F':
+			actionstr = "freezing";
+			action = RG_FREEZE;
+			svcname = optarg;
+			break;
+		case 'U':
+			actionstr = "unfreezing";
+			action = RG_UNFREEZE;
+			svcname = optarg;
+			break;
 		case 'q':
 			close(STDOUT_FILENO);
 			break;

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]