[Cluster-devel] conga luci/cluster/form-macros luci/homebase/v ...

rmccabe at sourceware.org rmccabe at sourceware.org
Mon Dec 11 23:58:02 UTC 2006


CVSROOT:	/cvs/cluster
Module name:	conga
Branch: 	RHEL5
Changes by:	rmccabe at sourceware.org	2006-12-11 23:58:01

Modified files:
	luci/cluster   : form-macros 
	luci/homebase  : validate_cluster_add.js 
	luci/site/luci/Extensions: cluster_adapters.py ricci_bridge.py 
	ricci/modules/cluster: ClusterConf.cpp 

Log message:
	Related: bz212021, bz218941, bz219156

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/cluster/form-macros.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.90.2.7&r2=1.90.2.8
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/homebase/validate_cluster_add.js.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.4&r2=1.4.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/cluster_adapters.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.120.2.13&r2=1.120.2.14
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/luci/site/luci/Extensions/ricci_bridge.py.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.30.2.11&r2=1.30.2.12
http://sourceware.org/cgi-bin/cvsweb.cgi/conga/ricci/modules/cluster/ClusterConf.cpp.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.8&r2=1.8.2.1

--- conga/luci/cluster/form-macros	2006/12/07 17:54:31	1.90.2.7
+++ conga/luci/cluster/form-macros	2006/12/11 23:57:59	1.90.2.8
@@ -1325,7 +1325,7 @@
 				<td>Hostname</td>
 				<td>
 					<input name="hostname" type="text"
-						tal:attributes="value cur_fencedev/hostname | nothing" />
+						tal:attributes="value cur_fencedev/hostname | cur_fencedev/ipaddr | nothing" />
 				</td>
 			</tr>
 			<tr>
@@ -2299,9 +2299,7 @@
 		global nodestatus python: here.getClusterStatus(request, ricci_agent);
 		global nodeinfo python: here.getNodeInfo(modelb, nodestatus, request);
 		global status_class python: 'node_' + (nodeinfo['nodestate'] == '0' and 'active' or (nodeinfo['nodestate'] == '1' and 'inactive' or 'unknown'));
-		global cluster_node_status_str python: (nodeinfo['nodestate'] == '0' and 'Cluster member' or (nodeinfo['nodestate'] == '1' and 'Currently not a cluster participant' or 'This node is not responding'));
-		global fenceinfo python: here.getFenceInfo(modelb, request);
-		global fencedevinfo python: here.getFencesInfo(modelb, request)"
+		global cluster_node_status_str python: (nodeinfo['nodestate'] == '0' and 'Cluster member' or (nodeinfo['nodestate'] == '1' and 'Currently not a cluster participant' or 'This node is not responding'))"
 	/>
 
 	 <table class="cluster node" width="100%">
@@ -2320,14 +2318,20 @@
 				<form method="post" onSubmit="return dropdown(this.gourl)">
 				<select name="gourl">
 					<option value="">Choose a Task...</option>
-					<option tal:attributes="value nodeinfo/jl_url">
+					<option tal:attributes="value nodeinfo/jl_url"
+						tal:condition="python: not 'ricci_error' in nodeinfo">
 						Have node <span tal:replace="python: nodeinfo['nodestate'] == '0' and 'leave' or 'join'" /> cluster
 					</option>
 					<option value="">----------</option>
 					<option tal:attributes="value nodeinfo/fence_url">Fence this node</option>
-					<option value="" tal:attributes="value nodeinfo/reboot_url">Reboot this node</option>
+					<option value="" tal:attributes="value nodeinfo/reboot_url"
+						tal:condition="python: not 'ricci_error' in nodeinfo">
+						Reboot this node
+					</option>
 					<option value="">----------</option>
-					<option tal:attributes="value nodeinfo/delete_url">Delete this node</option>
+					<option tal:attributes="value nodeinfo/delete_url"
+						tal:condition="python: not 'ricci_error' in nodeinfo">
+						Delete this node</option>
 				</select>
 				<input type="submit" value="Go"/>
 				</form>
@@ -2344,7 +2348,7 @@
 		</tr>
 
 		<tr class="cluster node info_bottom"
-			tal:condition="python: nodeinfo['nodestate'] == '0' or nodeinfo['nodestate'] == '1'">
+			tal:condition="python: (nodeinfo['nodestate'] == '0' or nodeinfo['nodestate'] == '1') and not 'ricci_error' in nodeinfo">
 			<td class="cluster node node_log" colspan="2">
 				<a class="cluster node"
 					tal:attributes="href nodeinfo/logurl" onClick="return popup_log(this, 'notes')">
@@ -2354,6 +2358,7 @@
 		</tr>
 	</table>
 
+	<tal:block tal:condition="python: not 'ricci_error' in nodeinfo">
 	<hr/>
 
 	<tal:block
@@ -2443,6 +2448,10 @@
 		<tal:block metal:use-macro="here/form-macros/macros/fence-form-list" />
 	</div>
 
+	<tal:block tal:define="
+		global fenceinfo python: here.getFenceInfo(modelb, request);
+		global fencedevinfo python: here.getFencesInfo(modelb, request)" />
+
 	<div class="invisible" id="shared_fence_devices">
 		<tal:block tal:repeat="cur_fencedev fencedevinfo/fencedevs">
 			<tal:block metal:use-macro="here/form-macros/macros/shared-fence-device-list" />
@@ -2494,36 +2503,67 @@
 		</tr>
 
 		<tr class="cluster node info_top fence">
-			<td class="cluster node fence_main fence"><div class="fence_container">
-				<div id="fence_list_level1">
-					<tal:comment tal:replace="nothing">
-						XXX - fill in any existing fence devices for this node
-						and update the counter number for this level
-					</tal:comment>
+			<td class="cluster node fence_main fence">
+				<div class="fence_container">
+				<div id="fence_list_level1" tal:define="global cur_fence_num python: 0">
+					<tal:block tal:condition="exists: fenceinfo/level1">
+						<tal:block tal:repeat="cur_fencedev fenceinfo/level1">
+							<tal:block tal:define="
+								cur_fence_type cur_fencedev/agent | nothing;
+								cur_fence_level python: 1;">
+								<div tal:attributes="id python: 'fence1_' + str(cur_fence_num)">
+									<tal:block
+										metal:use-macro="here/form-macros/macros/fencedev-cond-ladder" />
+								</div>
+							</tal:block>
+							<tal:block tal:define="global cur_fence_num python: cur_fence_num + 1" />
+						</tal:block>
+					</tal:block>
+					<tal:block
+						tal:replace="structure python: '<script type='+chr(0x22)+'text/javascript'+chr(0x22)+'>num_fences_level[0] = ' + str(cur_fence_num) + ';</script>'" />
 				</div>
 
 				<div class="fence_control">
 					<input type="button" value="Add a fence to this level"
 						onclick="add_node_fence_device(1);" />
 				</div>
-			</div></td>
+				</div>
+			</td>
 
-			<td class="cluster node fence_main fence"><div class="fence_container">
-				<div id="fence_list_level2">
-					<tal:comment tal:replace="nothing">
-						XXX - fill in any existing fence devices for this node
-						and update the counter number for this level
-					</tal:comment>
+			<td class="cluster node fence_main fence">
+				<div class="fence_container">
+				<div id="fence_list_level2" tal:define="global cur_fence_num python: 0">
+					<tal:block tal:condition="exists: fenceinfo/level2">
+						<tal:block tal:repeat="cur_fencedev fenceinfo/level2">
+							<tal:block tal:define="cur_fence_type cur_fencedev/agent | nothing">
+								<div tal:attributes="id python: 'fence2_' + str(cur_fence_num)">
+									<tal:block
+										metal:use-macro="here/form-macros/macros/fencedev-cond-ladder" />
+								</div>
+							</tal:block>
+							<tal:block tal:define="global cur_fence_num python: cur_fence_num + 1" />
+						</tal:block>
+					</tal:block>
+					<tal:block
+						tal:replace="structure python: '<script type='+chr(0x22)+'text/javascript'+chr(0x22)+'>num_fences_level[1] = ' + str(cur_fence_num) + ';</script>'" />
 				</div>
 
 				<div class="fence_control">
 					<input type="button" value="Add a fence to this level"
 						onclick="add_node_fence_device(2)" />
 				</div>
-			</div></td>
+				</div>
+			</td>
 		</tr>
 		</tbody>
 	</table>
+	</tal:block>
+	<tal:block tal:condition="python: 'ricci_error' in nodeinfo">
+		<hr/>
+		<strong class="errmsgs">
+			The ricci agent for this node is unresponsive. Node-specific information is not available at this time.
+		</strong>
+	</tal:block>
 </div>
 
 <div metal:define-macro="nodes-form">
@@ -2671,6 +2711,7 @@
 	<form name="adminform" action="" method="post">
 		<input name="numStorage" type="hidden" value="1" />
 		<input name="pagetype" type="hidden" value="15" />
+		<input name="addnode" type="hidden" value="1" />
 		<input type="hidden" name="clusterName"
 			tal:attributes="
 				value request/form/clusterName | request/clustername | nothing"
@@ -2740,11 +2781,19 @@
 
 <div metal:define-macro="nodeprocess-form">
 	<tal:block
-		tal:define="result python: here.nodeTaskProcess(modelb, request)"/>
+		tal:define="result python: here.nodeTaskProcess(modelb, request)">
 
-	<div>
-		<span tal:replace="result | nothing" />
-	</div>
+		<div id="errmsgsdiv" class="errmsgs"
+			tal:condition="python: result and len(result) > 1 and 'errors' in result[1]">
+            <p class="errmsgs">The following errors occurred:</p>
+
+            <ul class="errmsgs">
+                <tal:block tal:repeat="e python: result[1]['errors']">
+                    <li class="errmsgs" tal:content="python:e" />
+                </tal:block>
+            </ul>
+        </div>
+	</tal:block>
 </div>
 
 <div metal:define-macro="services-form">
@@ -3430,20 +3479,7 @@
 	</tal:block>
 </div>
 
-<div metal:define-macro="fencedev-form">
-	<h2>Fence Device Form</h2>
-
-	<div class="cluster fencedev">
-  <tal:block tal:define="
-		global cur_fencename request/fencename | nothing;
-		global cur_cluster request/clustername | nothing;
-		global cur_fence_type python: 'fence_apc'"/>
-
-  <span tal:condition="cur_fencename">
-    <span tal:define="global cur_fencedev python:here.getFence(modelb,request);
-                      global cur_fence_type cur_fencedev/agent"/>
-  </span>
-	
+<div metal:define-macro="fencedev-cond-ladder">
 	<tal:block tal:condition="python: cur_fence_type == 'fence_apc'">
 		<tal:block metal:use-macro="here/form-macros/macros/fence-form-apc" />
 	</tal:block>
@@ -3515,10 +3551,26 @@
 	<tal:block tal:condition="python: cur_fence_type == 'fence_manual'">
 		<tal:block metal:use-macro="here/form-macros/macros/fence-form-manual" />
 	</tal:block>
+</div>
 
-	<div class="fence_submit">
-		<input class="hbInput" type="button" value="Submit" name="Submit" />
-	</div>
+
+<div metal:define-macro="fencedev-form">
+	<h2>Fence Device Form</h2>
+
+	<div class="cluster fencedev">
+		<tal:block tal:define="
+			global cur_fencename request/fencename | nothing;
+			global cur_cluster request/clustername | nothing;
+			global cur_fence_type python: 'fence_apc'"/>
+
+		<span tal:condition="cur_fencename">
+			<span tal:define="
+					global cur_fencedev python:here.getFence(modelb,request);
+					global cur_fence_type cur_fencedev/agent" />
+		</span>
+	
+		<tal:block
+			metal:use-macro="here/form-macros/macros/fencedev-cond-ladder" />
 	</div>
 </div>
 
--- conga/luci/homebase/validate_cluster_add.js	2006/09/27 22:49:09	1.4
+++ conga/luci/homebase/validate_cluster_add.js	2006/12/11 23:58:00	1.4.2.1
@@ -29,7 +29,13 @@
 	if (error_dialog(errors))
 		return (-1);
 
-	if (confirm('Add the cluster \"' + clusterName + '\" to the Luci management interface?'))
+	var confirm_str = '';
+	if (form.addnode)
+		confirm_str = 'Add node' + (added_storage.length > 1 ? 's' : '') + ' to the \"' + clusterName + '\" cluster?';
+	else
+		confirm_str = 'Add the cluster \"' + clusterName + '\" to the Luci management interface?';
+
+	if (confirm(confirm_str))
 		form.submit();
 
 	return (0);
--- conga/luci/site/luci/Extensions/cluster_adapters.py	2006/12/07 17:54:31	1.120.2.13
+++ conga/luci/site/luci/Extensions/cluster_adapters.py	2006/12/11 23:58:00	1.120.2.14
@@ -13,6 +13,7 @@
 from Clusterfs import Clusterfs
 from Fs import Fs
 from RefObject import RefObject
+from ClusterNode import ClusterNode
 from NFSClient import NFSClient
 from NFSExport import NFSExport
 from Service import Service
@@ -268,9 +269,8 @@
 				% (key, str(e)))
 
 def validateAddClusterNode(self, request):
-	errors = list()
-	messages = list()
 	requestResults = {}
+	errors = list()
 
 	try:
 		sessionData = request.SESSION.get('checkRet')
@@ -280,6 +280,7 @@
 	if 'clusterName' in request.form:
 		clusterName = str(request.form['clusterName'])
 	else:
+		luci_log.debug_verbose('vACN00: no cluster name was given')
 		return (False, {'errors': [ 'Cluster name is missing'], 'requestResults': requestResults })
 
 	rhn_dl = 1
@@ -301,8 +302,9 @@
 	try:
 		numStorage = int(request.form['numStorage'])
 		if numStorage < 1:
-			raise
-	except:
+			raise Exception, 'no nodes were added'
+	except Exception, e:
+		luci_log.debug_verbose('vACN0: %s: %s' % (clusterName, str(e)))
 		errors.append('You must specify at least one node to add to the cluster')
 		return (False, {'errors': [ errors ], 'requestResults': requestResults })
 
@@ -313,34 +315,56 @@
 	try:
 		nodeList = cluster_properties['nodeList']
 		if len(nodeList) < 1:
-			raise
-	except:
+			raise Exception, 'no cluster nodes'
+	except Exception, e:
+		luci_log.debug_verbose('vACN1: %s: %s' % (clusterName, str(e)))
 		errors.append('You must specify at least one valid node to add to the cluster')
 
+	clusterObj = None
 	try:
 		clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName)
 		cluster_os = clusterObj.manage_getProperty('cluster_os')
 		if not cluster_os:
-			luci_log.debug('The cluster OS property is missing for cluster ' + clusterName)
-			raise Exception, 'no cluster OS was found.'
+			raise Exception, 'no cluster OS was found in DB for %s' % clusterName
+	except Exception, e:
+		luci_log.debug_verbose('vACN2: %s: %s' % (clusterName, str(e)))
 		try:
-			if len(filter(lambda x: x['os'] != cluster_os, nodeList)) > 0:
-				raise Exception, 'different operating systems were detected.'
-		except:
+			cluster_ricci = getRicciAgent(self, clusterName)
+			if not cluster_ricci:
+				raise Exception, 'cannot find a ricci agent for %s' % clusterName
+			cluster_os = getClusterOS(self, cluster_ricci)['os']
+			if clusterObj is None:
+				try:
+					clusterObj = self.restrictedTraverse(PLONE_ROOT + '/systems/cluster/' + clusterName)
+				except:
+					pass
+
+				try:
+					clusterObj.manage_addProperty('cluster_os', cluster_os, 'string')
+				except:
+					pass
+		except Exception, e:
+			luci_log.debug_verbose('vACN3: %s: %s' % (clusterName, str(e)))
 			nodeUnauth(nodeList)
+			cluster_os = None
 			cluster_properties['isComplete'] = False
-			errors.append('Cluster nodes must be running compatible operating systems.')
-	except:
+			errors.append('Unable to determine the cluster OS for the ' + clusterName + ' cluster.')
+
+	try:
+		if cluster_os is None:
+			raise Exception, 'no cluster OS found for %s' % clusterName
+		if len(filter(lambda x: x['os'] != cluster_os, nodeList)) > 0:
+			raise Exception, 'different operating systems were detected.'
+	except Exception, e:
+		luci_log.debug_verbose('vACN4: %s: %s' % (clusterName, str(e)))
 		nodeUnauth(nodeList)
 		cluster_properties['isComplete'] = False
-		errors.append('Unable to determine the cluster OS for the ' + clusterName + ' cluster.')
+		errors.append('Cluster nodes must be running compatible operating systems.')
 
 	if not cluster_properties['isComplete']:
 		return (False, {'errors': errors, 'requestResults': cluster_properties})
 
-	i = 0
-	while i < len(nodeList):
-		clunode = nodeList[i]
+	for clunode in nodeList:
 		try:
 			batchNode = addClusterNodeBatch(clunode['os'],
 							clusterName,
@@ -350,9 +374,11 @@
 							False,
 							rhn_dl)
 			if not batchNode:
-				raise
-			del nodeList[i]
-		except:
+				raise Exception, 'batchnode is None'
+			clunode['batchnode'] = batchNode
+		except Exception, e:
+			luci_log.debug_verbose('vACN5: node add for %s failed: %s' \
+				% (clunode['host'], str(e)))
 			clunode['errors'] = True
 			nodeUnauth(nodeList)
 			cluster_properties['isComplete'] = False
@@ -361,39 +387,99 @@
 	if not cluster_properties['isComplete']:
 		return (False, {'errors': errors, 'requestResults': cluster_properties})
 
+	try:
+		cluster_ricci = getRicciAgent(self, clusterName)
+		if not cluster_ricci:
+			raise Exception, 'Unable to get a ricci agent for %s' % clusterName
+	except Exception, e:
+		cluster_properties['isComplete'] = False
+		nodeUnauth(nodeList)
+		errors.append('Unable to contact a Ricci agent for %s.' % clusterName)
+		luci_log.debug_verbose('vACN6: ricci %s: %s' % (clusterName, str(e)))
+		return (False, {'errors': errors, 'requestResults': cluster_properties})
+
+	try:
+		model = getModelBuilder(None, cluster_ricci, cluster_ricci.dom0())
+		if not model:
+			raise Exception, 'unable to get model for %s' % clusterName
+		nodesptr = model.getClusterNodesPtr()
+		used_ids = {}
+		for i in model.getNodes():
+			try:
+				used_ids[int(i.getAttribute('nodeid'))] = 1
+			except Exception, e:
+				luci_log.debug_verbose('vACN7: %s' % str(e))
+				pass
+		next_node_id = 1
+		for i in nodeList:
+			next_node_id += 1
+			new_node = ClusterNode()
+			new_node.attr_hash['name'] = i['host']
+			new_node.attr_hash['votes'] = str(1)
+			while next_node_id in used_ids:
+				next_node_id += 1
+			new_node.attr_hash['nodeid'] = str(next_node_id)
+			nodesptr.addChild(new_node)
+
+		model.isModified = True
+		conf_str = str(model.exportModelAsString())
+		if not conf_str:
+			raise Exception, 'unable to export model as a string'
+		batch_number, result = setClusterConf(cluster_ricci, conf_str)
+
+		while True:
+			batch_ret = checkBatch(cluster_ricci, batch_number)
+			code = batch_ret[0]
+			if code == True:
+				break
+			if code == -1:
+				errors.append(batch_ret[1])
+				raise Exception, batch_ret[1]
+			if code == False:
+				time.sleep(0.5)
+	except Exception, e:
+		luci_log.debug_verbose('vACN8: %s' % str(e))
+		errors.append('Unable to update the cluster node list for %s' % clusterName)
+		return (False, {'errors': errors, 'requestResults': cluster_properties})
+
 	error = createClusterSystems(self, clusterName, nodeList)
 	if error:
+		luci_log.debug_verbose('vACN9: %s: %s' % (clusterName, str(e)))
 		nodeUnauth(nodeList)
 		cluster_properties['isComplete'] = False
 		errors.append(error)
 		return (False, {'errors': errors, 'requestResults': cluster_properties})
 
 	batch_id_map = {}
-	for i in nodeList:
-		clunode = nodeList[i]
+	for clunode in nodeList:
 		success = True
 		try:
 			rc = RicciCommunicator(clunode['host'])
+			if not rc:
+				raise Exception, 'rc is None'
 		except Exception, e:
-			luci_log.info('Unable to connect to the ricci daemon on host %s: %s'% (clunode['host'], str(e)))
+			nodeUnauth([clunode['host']])
 			success = False
+			luci_log.info('vACN10: Unable to connect to the ricci daemon on host %s: %s' % (clunode['host'], str(e)))
 
 		if success:
 			try:
-				resultNode = rc.process_batch(batchNode, async=True)
+				resultNode = rc.process_batch(clunode['batchnode'], async=True)
 				batch_id_map[clunode['host']] = resultNode.getAttribute('batch_id')
-			except:
+			except Exception, e:
+				nodeUnauth([clunode['host']])
 				success = False
+				luci_log.info('vACN11: %s' % (clunode['host'], str(e)))
 
 		if not success:
-			nodeUnauth(nodeList)
 			cluster_properties['isComplete'] = False
 			errors.append('An error occurred while attempting to add cluster node \"' + clunode['host'] + '\"')
-			return (False, {'errors': errors, 'requestResults': cluster_properties})
 
-	messages.append('Cluster join initiated for host \"' + clunode['host'] + '\"')
 	buildClusterCreateFlags(self, batch_id_map, clusterName)
 
+	if len(errors) > 0:
+		return (False, {'errors': errors, 'requestResults': cluster_properties})
+
 	response = request.RESPONSE
 	response.redirect(request['URL'] + "?pagetype=" + CLUSTER_CONFIG + "&clustername=" + clusterName + '&busyfirst=true')
 
@@ -2575,9 +2661,7 @@
   if len(clulist) < 1:
     return {}
   clu = clulist[0]
-  cluerror = False
   if 'error' in clu:
-    cluerror = True
     map['error'] = True
   clustername = clu['name']
   if clu['alias'] != "":
@@ -2615,7 +2699,6 @@
   map['currentservices'] = svc_dict_list
   node_dict_list = list()
   for item in nodelist:
-    node_error = 'error' in item
     nmap = {}
     name = item['name']
     nmap['nodename'] = name
@@ -2947,30 +3030,30 @@
 def nodeTaskProcess(self, model, request):
 	try:
 		clustername = request['clustername']
-	except KeyError, e:
+	except:
 		try:
 			clustername = request.form['clustername']
 		except:
-			luci_log.debug('missing cluster name for NTP')
-			return None
+			luci_log.debug('NTP0: missing cluster name')
+			return (False, {'errors': [ 'No cluster name was given.' ]})
 
 	try:
 		nodename = request['nodename']
-	except KeyError, e:
+	except:
 		try:
 			nodename = request.form['nodename']
 		except:
-			luci_log.debug('missing nodename name for NTP')
-			return None
+			luci_log.debug('NTP1: missing node name')
+			return (False, {'errors': [ 'No node name was given.' ]})
 
 	try:
 		task = request['task']
-	except KeyError, e:
+	except:
 		try:
 			task = request.form['task']
 		except:
-			luci_log.debug('missing task for NTP')
-			return None
+			luci_log.debug('NTP2: missing task')
+			return (False, {'errors': [ 'No node task was given.' ]})
 
 	nodename_resolved = resolve_nodename(self, clustername, nodename)
 
@@ -2980,24 +3063,27 @@
 		# to be performed.
 		try:
 			rc = RicciCommunicator(nodename_resolved)
+			if not rc:
+				raise Exception, 'rc is None'
 		except RicciError, e:
-			luci_log.debug('ricci error from %s: %s' \
+			luci_log.debug('NTP3: ricci error from %s: %s' \
 				% (nodename_resolved, str(e)))
-			return None
+			return (False, {'errors': [ 'Unable to connect to the ricci agent on %s.' % nodename_resolved ]})
 		except:
-			return None
+			luci_log.debug('NTP4: ricci error from %s: %s' \
+				% (nodename_resolved, str(e)))
+			return (False, {'errors': [ 'Unable to connect to the ricci agent on %s.' % nodename_resolved ]})
 
 		cluinfo = rc.cluster_info()
 		if not cluinfo[0] and not cluinfo[1]:
-			luci_log.debug('host %s not in a cluster (expected %s)' \
+			luci_log.debug('NTP5: node %s not in a cluster (expected %s)' \
 				% (nodename_resolved, clustername))
-			return None
+			return (False, {'errors': [ 'Node %s reports it is not in a cluster.' % nodename_resolved ]})
 
 		cname = lower(clustername)
 		if cname != lower(cluinfo[0]) and cname != lower(cluinfo[1]):
-			luci_log.debug('host %s in unknown cluster %s:%s (expected %s)' \
-				% (nodename_resolved, cluinfo[0], cluinfo[1], clustername))
-			return None
+			luci_log.debug('NTP6: node %s in unknown cluster %s:%s (expected %s)' % (nodename_resolved, cluinfo[0], cluinfo[1], clustername))
+			return (False, {'errors': [ 'Node %s reports it in cluster \"%s\". We expect it to be a member of cluster \"%s\"' % (nodename_resolved, cluinfo[0], clustername) ]})
 
 		if not rc.authed():
 			rc = None
@@ -3016,40 +3102,45 @@
 				pass
 
 		if rc is None:
-			return None
+			luci_log.debug('NTP7: node %s is not authenticated' \
+				% nodename_resolved)
+			return (False, {'errors': [ 'Node %s is not authenticated' % nodename_resolved ]})
 
 	if task == NODE_LEAVE_CLUSTER:
 		if nodeLeave(self, rc, clustername, nodename_resolved) is None:
-			luci_log.debug_verbose('NTP: nodeLeave failed')
-			return None
+			luci_log.debug_verbose('NTP8: nodeLeave failed')
+			return (False, {'errors': [ 'Node %s failed to leave cluster %s' % (nodename_resolved, clustername) ]})
 
 		response = request.RESPONSE
 		response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true')
 	elif task == NODE_JOIN_CLUSTER:
 		if nodeJoin(self, rc, clustername, nodename_resolved) is None:
-			luci_log.debug_verbose('NTP: nodeJoin failed')
-			return None
+			luci_log.debug_verbose('NTP9: nodeJoin failed')
+			return (False, {'errors': [ 'Node %s failed to join cluster %s' % (nodename_resolved, clustername) ]})
 
 		response = request.RESPONSE
 		response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true')
 	elif task == NODE_REBOOT:
 		if forceNodeReboot(self, rc, clustername, nodename_resolved) is None:
-			luci_log.debug_verbose('NTP: nodeReboot failed')
-			return None
+			luci_log.debug_verbose('NTP10: nodeReboot failed')
+			return (False, {'errors': [ 'Node %s failed to reboot' \
+				% nodename_resolved ]})
 
 		response = request.RESPONSE
 		response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true')
 	elif task == NODE_FENCE:
 		if forceNodeFence(self, clustername, nodename, nodename_resolved) is None:
-			luci_log.debug_verbose('NTP: nodeFencefailed')
-			return None
+			luci_log.debug_verbose('NTP11: nodeFencefailed')
+			return (False, {'errors': [ 'Fencing of node %s failed.' \
+				% nodename_resolved]})
 
 		response = request.RESPONSE
 		response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true')
 	elif task == NODE_DELETE:
 		if nodeDelete(self, rc, model, clustername, nodename, nodename_resolved) is None:
-			luci_log.debug_verbose('NTP: nodeDelete failed')
-			return None
+			luci_log.debug_verbose('NTP12: nodeDelete failed')
+			return (False, {'errors': [ 'Deletion of node %s from cluster %s failed.' % (nodename_resolved, clustername) ]})
+
 		response = request.RESPONSE
 		response.redirect(request['URL'] + "?pagetype=" + NODES + "&clustername=" + clustername + '&busyfirst=true')
 
@@ -3144,6 +3235,7 @@
         raise Exception, 'rc is none'
     except Exception, e:
       rc = None
+      infohash['ricci_error'] = True
       luci_log.info('Error connecting to %s: %s' \
           % (nodename_resolved, str(e)))
 
@@ -3155,6 +3247,8 @@
       dlist.append("rgmanager")
       states = getDaemonStates(rc, dlist)
       infohash['d_states'] = states
+  else:
+    infohash['ricci_error'] = True
 
   infohash['logurl'] = '/luci/logs/?nodename=' + nodename_resolved + '&clustername=' + clustername
   return infohash
@@ -3246,7 +3340,12 @@
 
     map['currentservices'] = svc_dict_list
     #next is faildoms
-    fdoms = model.getFailoverDomainsForNode(name)
+
+    if model:
+      fdoms = model.getFailoverDomainsForNode(name)
+    else:
+      map['ricci_error'] = True
+      fdoms = list()
     fdom_dict_list = list()
     for fdom in fdoms:
       fdom_dict = {}
@@ -3976,19 +4075,26 @@
         rc = RicciCommunicator(ricci[0])
       except Exception, e:
         rc = None
-        finished = False
+        finished = -1
+        err_msg = ''
         luci_log.debug_verbose('ICB15: ricci error: %s: %s' \
           % (ricci[0], str(e)))
 
       if rc is not None:
-        finished = checkBatch(rc, item[1].getProperty(BATCH_ID))
-
-      if finished == True:
+        batch_res = checkBatch(rc, item[1].getProperty(BATCH_ID))
+        finished = batch_res[0]
+        err_msg = batch_res[1]
+
+      if finished == True or finished == -1:
+        if finished == -1:
+          flag_msg = err_msg
+        else:
+          flag_msg = ''
         flag_desc = item[1].getProperty(FLAG_DESC)
         if flag_desc is None:
-          node_report['desc'] = REDIRECT_MSG
+          node_report['desc'] = flag_msg + REDIRECT_MSG
         else:
-          node_report['desc'] = flag_desc + REDIRECT_MSG
+          node_report['desc'] = flag_msg + flag_desc + REDIRECT_MSG
         nodereports.append(node_report)
         try:
             clusterfolder.manage_delObjects(item[0])
@@ -5066,8 +5172,11 @@
 				pass
 			luci_log.info('NNFP2: %s not authenticated' % item[0])
 
-		finished = checkBatch(rc, item[1].getProperty(BATCH_ID))
-		if finished == True:
+		batch_ret = checkBatch(rc, item[1].getProperty(BATCH_ID))
+		finished = batch_ret[0]
+		if finished == True or finished == -1:
+			if finished == -1:
+				luci_log.debug_verbose('NNFP2: batch error: %s' % batch_ret[1])
 			try:
 				nodefolder.manage_delObjects(item[0])
 			except Exception, e:
--- conga/luci/site/luci/Extensions/ricci_bridge.py	2006/12/07 17:54:31	1.30.2.11
+++ conga/luci/site/luci/Extensions/ricci_bridge.py	2006/12/11 23:58:00	1.30.2.12
@@ -1,7 +1,7 @@
 import xml
 from time import time, ctime
 from xml.dom import minidom
-from ricci_communicator import RicciCommunicator
+from ricci_communicator import RicciCommunicator, extract_module_status
 from LuciSyslog import LuciSyslog
 
 try:
@@ -10,23 +10,37 @@
 	pass
 
 def checkBatch(rc, batch_id):
+	err_msg = 'An unknown Ricci error occurred on %s' % rc.hostname()
+
 	try:
 		batch = rc.batch_report(batch_id)
 		if batch is None:
-			return True
-	except:
-		return False
+			return (True, 'batch id was not found')
+	except Exception, e:
+		luci_log.debug_verbose('checkBatch0: %s: %s' % (rc.hostname(), str(e)))
+		return (-1, err_msg)
 
 	try:
-		dummy = batch.getAttribute('batch_id')
-		result = batch.getAttribute('status')
-	except:
-		return False
-
-	if result == '0':
-		return True
+		code, new_err_msg = extract_module_status(batch, 1)
+		if new_err_msg:
+			err_msg = 'A Ricci error occurred on %s: %s' \
+				% (rc.hostname(), str(new_err_msg))
+	except Exception, e:
+		luci_log.debug_verbose('checkBatch1: %s: %s' % (rc.hostname(), str(e)))
+		return (-1, err_msg)
 
-	return False
+	# In progress.
+	if code == -101 or code == -102:
+		return (False, 'in progress')
+
+	# Done successfully.
+	if code == 0:
+		return (True, 'completed sucessfully')
+
+	# Error
+	luci_log.debug_verbose('checkBatch2: %s: %s: %s' \
+		% (str(code), rc.hostname(), str(e)))
+	return (-1, err_msg)
 
 def addClusterNodeBatch(os_str,
 						cluster_name,
--- conga/ricci/modules/cluster/ClusterConf.cpp	2006/10/06 03:10:13	1.8
+++ conga/ricci/modules/cluster/ClusterConf.cpp	2006/12/11 23:58:01	1.8.2.1
@@ -85,6 +85,7 @@
   fstream fs(tmp_path.c_str(), 
 	     ios_base::out | ios_base::trunc);
   fs << generateXML(xml);
+  fs.close();
   
   // propagate/move to proper place
   if (propagate) {




More information about the Cluster-devel mailing list