[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [Cluster-devel] rind-0.8.1 patch



Hi Lon,
finally I had time looking at this patch and adapted your example for the 
follow-service a little bit.

Besides that the eventtriggering is running es expected I stubled over some 
minor changes (find patch attached).

1. Isn't it better to organize the configuration as follows:
  <event name="followservice_node" class="node" 
file="/usr/local/cluster/follow-service.sl">
                        follow_service("service:ip_a";, "service:ip_b";, "ip_a", 
1);
    </event>
Now you can use the follow_service function as a library function and make the 
implementation in the cluster.conf (this is already integrated in the patch).

I would also like something like this:
  <event name="followservice_node" class="node">      
        <file="/usr/local/cluster/another-lib.sl">
        <file="/usr/local/cluster/follow-service.sl">
        follow_service("service:ip_a";, "service:ip_b";, "ip_a", 1);
    </event>
This would make using sl-files very modular. I didn't yet have time to 
implement it but wanted to hear what you are thinking.

2. I found that the sl-function nodes_online() returns also online if the node 
in question is in the cluster but has no rgmanager running. For me it worked 
just to change the line in rgmanager/src/daemons/slang_event.c:606 :
-               if (membership->cml_members[i].cn_member &&
+               if (membership->cml_members[i].cn_member > 0 &&
But I'm not sure if this is right. For me it worked perfectly well ;-) .

Next is I reimplemented your example on follow-service and made it more 
general. Still some cases might not be handled. But all my tests (which were 
not too many up to know) didn't show any problems. I will hand it over to the 
SAP Guys this week to let then see it this suits there requirements for 
master/slave queue replication (find the example attached).

I hope this feetback helps.

Regards Marc.


On Friday 30 November 2007 17:49:05 Lon Hohberger wrote:
> Minor bugfixes.
>
> -- Lon



-- 
Gruss / Regards,

Marc Grimme
http://www.atix.de/               http://www.open-sharedroot.org/
diff -Naur rgmanager.old/src/daemons/event_config.c rgmanager/src/daemons/event_config.c
--- rgmanager.old/src/daemons/event_config.c	2007-12-18 18:52:56.000000000 +0100
+++ rgmanager/src/daemons/event_config.c	2008-02-04 17:13:23.000000000 +0100
@@ -338,14 +338,13 @@
 		 base, idx);
 	if (ccs_get(ccsfd, xpath, &ret) == 0) {
 		ev->ev_script_file = ret;
+	}
+	snprintf(xpath, sizeof(xpath), "%s/event[%d]",
+	         base, idx);
+	if (ccs_get(ccsfd, xpath, &ret) == 0) {
+		ev->ev_script = ret;
 	} else {
-		snprintf(xpath, sizeof(xpath), "%s/event[%d]",
-		         base, idx);
-		if (ccs_get(ccsfd, xpath, &ret) == 0) {
-			ev->ev_script = ret;
-		} else {
-			goto out_fail;
-		}
+		goto out_fail;
 	}
 
 	/* Get the priority ordering (must be nonzero) */
diff -Naur rgmanager.old/src/daemons/slang_event.c rgmanager/src/daemons/slang_event.c
--- rgmanager.old/src/daemons/slang_event.c	2008-02-01 16:15:02.000000000 +0100
+++ rgmanager/src/daemons/slang_event.c	2008-02-04 17:14:08.000000000 +0100
@@ -606,7 +606,7 @@
 
 	nodecount = 0;
 	for (i = 0; i < membership->cml_count; i++) {
-		if (membership->cml_members[i].cn_member &&
+		if (membership->cml_members[i].cn_member > 0 &&
 		    membership->cml_members[i].cn_nodeid != 0) {
 			nodes[nodecount] = membership->cml_members[i].cn_nodeid;
 			++nodecount;
@@ -1050,7 +1050,8 @@
 
 	if (file) 
 		ret = SLang_load_file((char *)file);
-	else
+
+	if ((ret >= 0) && script)
 		ret = SLang_load_string((char *)script);
 
 	if (ret < 0) {
%
% Returns a list of nodes for the given service that are online and in the failoverdomain.
%
define nodelist_online(service_name) {
   variable nodes, nofailback, restricted, ordered, node_list;
   nodes=nodes_online();
   
   (nofailback, restricted, ordered, node_list) = service_domain_info(service_name);
   
   return intersection(nodes, node_list);
}

%
% Idea: 
%   General purpose function of a construct when Service(svc1) and Service(svc2) 
%   should not be running on the same node even after failover.
%   There are to options to influence the behaviour. If both services have to be 
%   running on the same node (only one node is left in the failovergroup) what 
%   service is the master and should both services be running or only the master
%   service survives. If master is not svc1 or svc2 both service might run on the 
%   same node. If master is either svc1 or svc2 the specified one will be the 
%   surviving service.
%   If followslave is not 0 the svc1 always follows svc2. That means it will be 
%   started on on the same node as svc1. And if available svc2 will be relocated
%   to any other node.
%
define follow_service(svc1, svc2, master, followslave)
{
	variable state, owner_svc1, owner_svc2;
	variable nodes1, nodes2, allowed;

    debug("*** FOLLOW_SERVICE: follow_service(",svc1,", ",svc2,", ", master, ", ", followslave, ")");
    debug("*** FOLLOW_SERVICE: event_type: ", event_type, "service_name: ", service_name, ", service_state: ", service_state);

    %
    % setup the master
    %
    if ((master != svc1) and (master != svc2)) {
       debug("*** FOLLOW_SERVICE: master=NULL");
       master=NULL;
    }

	% get infos we need to decide further
	(owner_svc1, state) = service_status(svc1);
	(owner_svc2, state) = service_status(svc2);
	nodes1 = nodelist_online(svc1);
	nodes2 = nodelist_online(svc2);
    debug("*** FOLLOW_SERVICE: service_status(",svc1,"): ", service_status(svc1));
    debug("*** FOLLOW_SERVICE: owner_svc1: ", owner_svc1, ", owner_svc2: ", owner_svc2, ", nodes1: ", nodes1, ", nodes2: ", nodes2);

	if ((event_type == EVENT_NODE) and (owner_svc1 == node_id) and
	    (node_state == NODE_OFFLINE) and (owner_svc2 >= 0)) {
		%
		% uh oh, the owner of the master server died.  Restart it
		% on the node running the slave server or if we should not 
		% follow the slave start it somewhere else.
		%
		if (followslave>0) {
		  if (master != svc2) {
		    ()=service_start(svc1, owner_svc2);
		  } 
		} else {
		  allowed = subtract(nodes1, owner_svc2);
		  if (length(allowed) > 0) { 
		    ()=service_start(svc1, allowed);
		  } else if (master == svc1) {
		    ()=service_start(svc1, owner_svc2);
		    ()=service_stop(svc2);
		  } else if (master == NULL) {
		    ()=service_start(svc1, owner_svc2);
		  }
		}
	}
	else if ((event_type == EVENT_NODE) and (owner_svc2 == node_id) and
	      (node_state == NODE_OFFLINE) and (owner_svc1 >= 0)) {
		%
		% uh oh, the owner of the svc2 died.  Restart it
		% on any other node but not the one running the svc1.
		% If svc1 is the only one left only start it there 
		% if master==svc2
		%
		allowed=subtract(nodes2, owner_svc1);
		if (length(allowed) > 0) {
		  ()=service_start(svc2, allowed);
		} else if (master == svc2) {
		  ()=service_start(svc2, owner_svc1);
          ()=service_stop(svc1);
		} else if (master == NULL) {
		  ()=service_start(svc2, owner_svc1);
		}
    }
    else if (((event_type == EVENT_SERVICE) and (service_state == "started") and (owner_svc2 == owner_svc1) and (owner_svc1 > 0) and (owner_svc2 > 0)) or
             ((event_type == EVENT_CONFIG) and (owner_svc2 == owner_svc1))) {
        allowed=subtract(nodes2, owner_svc1);
        debug("*** FOLLOW SERVICE: service event started triggered.", allowed);
        if (length(allowed) > 0) {
           ()=service_stop(svc2);
           ()=service_start(svc2, allowed);
		} else if ((master == svc2) and (owner_svc2 > 0)){
		   debug("*** FOLLOW SERVICE: will stop service .", svc1);
		   ()=service_stop(svc1);
		} else if ((master == svc1) and (owner_svc1 > 0)) {
		   debug("*** FOLLOW SERVICE: will stop service .", svc2);
		   ()=service_stop(svc2);
		} else {
		   debug("*** FOLLOW SERVICE: both services running on the same node or only one is running.", allowed, ", ", master);
		}
    }
	return;
}

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]