[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Cluster-devel] [fence-virt PATCH] backend plugin for Pacemaker



Hi,

I created the backend "pm-fence" which has met the following
configurations / requirements.
- Both hosts and VMs, cluster (Pacemaker) have been configured.
  * This backend has STONITH (fencing) function of the VM cluster.
- The VMs are managed as resources (RA is VirtualDomain) by the host cluster.
  * In this configuration, in order to stop a VM,
    it is necessary to stop the resource through the host cluster.
    When the VM was stopped without through the host cluster
    (e.g., run the 'virsh destory'), the host cluster considers that
    VM resource failed.

Here's an overview of function. Please refer to attached 'overview.png'.
(1) pacemaker detects failure of resource.
(2) STONITH (STONITH plugin is vm-stonith) is executed, target is srv-B.
(3) vm-stonith requests 'fencing' to a host with fence_virt.
(4) use the serial listener.
(5~7) fence_virtd (pm-fence backend) requests 'forced stop' of VM
    resource (that is, srv-B) to pacemaker.
(8) destroy the srv-B.

Here's a description of the attached files.
* pm-fence_backend_for_pacemaker.patch
  - add the server/pm-fence.c
  - change the configure.in and server/Makefile.in
* VirtualDoamin
  - Resource Agent for domains managed.
  - I'm going to post this RA to the appropriate community
    (https://github.com/ClusterLabs/resource-agents).
* vm-stonith
  - STONITH plugin (External STONITH module) for VM.
  - I'm going to post this plugin to the appropriate community
    (http://hg.linux-ha.org/glue/).
* overview.png
  - figure of the overview.
* fence_virt.conf
  - sample configuration.
* host.cli, mon-host.txt
  - sample configuration file of a host cluster,
    and mon-host.txt is an output of the crm_mon command.
* VM.cli, mon-VM.txt
  - sample configuration file of a VM cluster,
    and mon-VM.txt is an output of the crm_mon command.

Best Regards,
diff -urN fence-virt-200eab4/configure.in mod/configure.in
--- fence-virt-200eab4/configure.in	2011-08-11 09:53:47.000000000 +0900
+++ mod/configure.in	2011-09-20 10:22:50.222506615 +0900
@@ -26,6 +26,8 @@
 AC_CHECK_LIB([virt], [virConnectOpen])
 AC_CHECK_LIB([xml2], [main])
 
+AC_SEARCH_LIBS([read_attr_delegate], [cib], [], [ pm_ver=1.0 ])
+AC_SUBST(pm_ver)
 
 # Checks for header files.
 AC_HEADER_DIRENT
@@ -92,6 +94,13 @@
 [ mod_libvirt_qpid=$enableval ], [ mod_libvirt_qpid=yes ])
 AC_SUBST(mod_libvirt_qpid)
 
+# pm-fence plugin: Disabled by default
+AC_ARG_ENABLE(pm-fence-plugin,
+[AS_HELP_STRING([--enable-pm-fence-plugin],
+		[Enable pm-fence backend plugin])],
+[ mod_pm_fence=$enableval ], [ mod_pm_fence=no ])
+AC_SUBST(mod_pm_fence)
+
 # multicast plugin: Enabled by default
 AC_ARG_ENABLE(multicast-plugin,
 [AS_HELP_STRING([--disable-multicast-plugin],
diff -urN fence-virt-200eab4/server/Makefile.in mod/server/Makefile.in
--- fence-virt-200eab4/server/Makefile.in	2011-08-11 09:53:47.000000000 +0900
+++ mod/server/Makefile.in	2011-09-20 10:22:50.223506252 +0900
@@ -22,6 +22,8 @@
 MAIN_LIBS=-L../config -lsimpleconfig -ldl
 AIS_LIBS=-L/usr/lib64/openais -lSaCkpt
 COROSYNC_LIBS=-L/usr/lib64/corosync -lcpg
+PACEMAKER_LIBS=-lcib -lpe_status -lncurses
+PACEMAKER_INCLUDES=-I/usr/include/glib-2.0 -I$(libdir)/glib-2.0/include -I/usr/include/pacemaker -I/usr/include/heartbeat
 CMAN_LIBS=-lcman
 VIRT_LIBS=-lvirt
 VIRT_QPID=-lqmf2 -lqpidclient -lqpidtypes -lqpidcommon -lqpidmessaging
@@ -42,6 +44,7 @@
 null_so_SOURCES = null.c
 libvirt_qpid_so_SOURCES = uuid-test.c
 libvirt_qpid_cxx_so_SOURCES = libvirt-qpid.cpp
+pm_fence_so_SOURCES = pm-fence.c
 multicast_so_SOURCES = mcast.c history.c
 checkpoint_so_SOURCES = virt.c vm_states.c history.c checkpoint.c cpg.c
 serial_so_SOURCES = virt-serial.c virt-sockets.c serial.c history.c
@@ -54,6 +57,7 @@
 mod_libvirt= mod_libvirt@
 mod_checkpoint= mod_checkpoint@
 mod_libvirt_qpid= mod_libvirt_qpid@
+mod_pm_fence= mod_pm_fence@
 mod_multicast= mod_multicast@
 mod_serial= mod_serial@
 
@@ -71,6 +75,9 @@
 ifneq ($(mod_libvirt_qpid),no)
 MODULES+=libvirt-qpid.so
 endif
+ifneq ($(mod_pm_fence),no)
+MODULES+=pm-fence.so
+endif
 ifneq ($(mod_multicast),no)
 MODULES+=multicast.so
 endif
@@ -100,6 +107,10 @@
 fence_virtd_cxx_SOURCES+=${libvirt_qpid_cxx_so_SOURCES}
 LIBS+=$(VIRT_QPID)
 endif
+ifneq ($(mod_pm_fence),no)
+fence_virtd_SOURCES+=${pm_fence_so_SOURCES}
+LIBS+=$(PACEMAKER_LIBS)
+endif
 ifneq ($(mod_multicast),no)
 fence_virtd_SOURCES+=${multicast_so_SOURCES}
 LIBS+=$(AIS_LIBS) $(NSS_LIBS)
@@ -116,6 +127,11 @@
 
 CFLAGS+=-DSYSCONFDIR=\"@sysconfdir \"
 
+pm_ver= pm_ver@
+ifeq ($(pm_ver),1.0)
+CFLAGS+=-DPM_1_0
+endif
+
 all: ${TARGETS} ${MODULES}
 
 fence_virtd: ${fence_virtd_SOURCES:.c=.o} ${fence_virtd_cxx_SOURCES:.cpp=.opp}
@@ -130,6 +146,9 @@
 libvirt-qpid.so: ${libvirt_qpid_so_SOURCES:.c=.o} ${libvirt_qpid_cxx_so_SOURCES:.cpp=.opp}
 	$(CXX) -o $@ $^ $(LIBS) -shared $(VIRT_QPID)
 
+pm-fence.so: ${pm_fence_so_SOURCES:.c=.o}
+	$(CC) -o $@ $^ $(LIBS) -shared $(PACEMAKER_LIBS)
+
 null.so: ${null_so_SOURCES:.c=.o}
 	$(CC) -o $@ $^ $(LIBS) -shared
 
@@ -143,6 +162,9 @@
 %.o: %.c
 	$(CC) $(CFLAGS) -c -o $@ $^ $(INCLUDES)
 
+pm-fence.o: pm-fence.c
+	$(CC) $(CFLAGS) -c -o $@ $^ $(INCLUDES) $(PACEMAKER_INCLUDES)
+
 %.opp: %.cpp
 	$(CXX) $(CFLAGS) -c -o $@ $^ $(INCLUDES)
 
diff -urN fence-virt-200eab4/server/pm-fence.c mod/server/pm-fence.c
--- fence-virt-200eab4/server/pm-fence.c	1970-01-01 09:00:00.000000000 +0900
+++ mod/server/pm-fence.c	2011-09-20 10:22:50.223506252 +0900
@@ -0,0 +1,694 @@
+/*
+  Copyright Red Hat, Inc. 2006
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License as published by the
+  Free Software Foundation; either version 2, or (at your option) any
+  later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; see the file COPYING.  If not, write to the
+  Free Software Foundation, Inc.,  675 Mass Ave, Cambridge, 
+  MA 02139, USA.
+*/
+#include <stdio.h>
+#include <netdb.h>
+#include <errno.h>
+#include <syslog.h>
+#include <simpleconfig.h>
+#include <static_map.h>
+
+#include <server_plugin.h>
+
+#include <crm/cib.h>
+#include <crm/pengine/status.h>
+
+/* Local includes */
+#include "xvm.h"
+#include "debug.h"
+
+
+#define BACKEND_NAME "pm-fence"
+#define VERSION "0.1"
+
+#define MAGIC 0x1e0d197a
+
+#define ATTR_NAME_PREFIX "force_stop-"
+#define ATTR_VALUE "true"
+#define READ_CIB_RETRY 30
+
+enum rsc_status {
+	RS_STARTED = 1,
+	RS_STOPPED,
+	RS_UNDEFINED,
+	RS_GETERROR
+};
+
+struct pf_info {
+	int magic;
+	cib_t *cib;
+	unsigned int loglevel;
+};
+cib_t **cib = NULL;
+pe_working_set_t data_set;
+
+#define VALIDATE(arg) \
+do { \
+	if (!arg || ((struct pf_info *)arg)->magic != MAGIC) { \
+		errno = EINVAL; \
+		return -1; \
+	} \
+} while(0)
+
+
+static void
+free_dataset(void)
+{
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (!data_set.input)
+		return;
+	free_xml(data_set.input);
+	data_set.input = NULL;
+	cleanup_calculations(&data_set);
+	memset(&data_set, 0, sizeof(pe_working_set_t));
+}
+
+static void
+disconnect_cib(void)
+{
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (*cib) {
+		(*cib)->cmds->signoff(*cib);
+		cib_delete(*cib);
+		*cib = NULL;
+	}
+	free_dataset();
+}
+
+static gboolean
+connect_cib(void)
+{
+	enum cib_errors rc = cib_ok;
+	int i;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (*cib)
+		return TRUE;
+	memset(&data_set, 0, sizeof(pe_working_set_t));
+
+	*cib = cib_new();
+	if (!*cib) {
+		syslog(LOG_NOTICE, "cib connection initialization failed\n");
+		printf("cib connection initialization failed\n");
+		return FALSE;
+	}
+	for (i = 1; i <= 20; i++) {
+		if (i) sleep(1);
+		dbg_printf(4, "%s: connect to cib attempt: %d\n", __FUNCTION__, i);
+		rc = (*cib)->cmds->signon(*cib, crm_system_name, cib_command);
+		if (rc == cib_ok)
+			break;
+	}
+	if (rc != cib_ok) {
+		syslog(LOG_NOTICE,
+			"failed to signon to cib: %s\n", cib_error2string(rc));
+		printf("failed to signon to cib: %s\n", cib_error2string(rc));
+		disconnect_cib();
+		return FALSE;
+	}
+	dbg_printf(3, "%s: succeed at connect to cib\n", __FUNCTION__);
+	return TRUE;
+}
+
+static gboolean
+get_dataset(void)
+{
+	xmlNode *current_cib;
+	unsigned int loglevel;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	free_dataset();
+	current_cib = get_cib_copy(*cib);
+	if (!current_cib)
+		return FALSE;
+	set_working_set_defaults(&data_set);
+	data_set.input = current_cib;
+	data_set.now = new_ha_date(TRUE);
+
+	/* log output of the level below LOG_ERR is deterred */
+	loglevel = get_crm_log_level();
+	set_crm_log_level(LOG_ERR);
+	cluster_status(&data_set);
+	set_crm_log_level(loglevel);
+	return TRUE;
+}
+
+static enum rsc_status
+get_rsc_status(const char *rid, char **node, char **uuid)
+{
+	GListPtr gIter = NULL, gIter2 = NULL;
+	resource_t *rsc;
+
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	if (!rid || connect_cib() == FALSE)
+		return RS_GETERROR;
+	if (get_dataset() == FALSE) {
+		disconnect_cib();
+		if (connect_cib() == FALSE || get_dataset() == FALSE)
+			return RS_GETERROR;
+	}
+
+	/* find out from RUNNING resources */
+	gIter = data_set.nodes;
+	for(; gIter; gIter = gIter->next) {
+		node_t *node2 = (node_t*)gIter->data;
+
+		gIter2 = node2->details->running_rsc;
+		for(; gIter2; gIter2 = gIter2->next) {
+			resource_t *rsc2 = (resource_t*)gIter2->data;
+
+			dbg_printf(3, "%s: started resource [%s]\n",
+				__FUNCTION__, rsc2->id);
+			if (safe_str_eq(rid, rsc2->id)) {
+				if (node && !*node) {
+					*node = crm_strdup(node2->details->uname);
+					*uuid = crm_strdup(node2->details->id);
+					dbg_printf(3, "%s: started node [%s(%s)]\n",
+						__FUNCTION__, *node, *uuid);
+				}
+				return RS_STARTED;
+			}
+		}
+	}
+
+	/* find out from ALL resources */
+	rsc = pe_find_resource(data_set.resources, rid);
+	if (rsc) {
+		dbg_printf(3, "%s: stopped resource [%s]\n", __FUNCTION__, rsc->id);
+		return RS_STOPPED;
+	}
+	return RS_UNDEFINED;
+}
+
+/*
+ * The cluster node attribute is updated for RA which controls a virtual machine.
+ */
+static gboolean
+update_status_attr(char cmd, const char *rid,
+	const char *node, const char *uuid, gboolean confirm)
+{
+	char *name = g_strdup_printf("%s%s", ATTR_NAME_PREFIX, rid);
+	char *value;
+	gboolean ret = FALSE;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	switch (cmd) {
+	case 'U':
+		value = ATTR_VALUE;
+		break;
+	case 'D':
+		value = NULL;
+		break;
+	default:
+		goto out;
+	}
+	dbg_printf(1, "%s: Update attribute %s=%s for %s\n",
+		__FUNCTION__, name, value, node);
+
+	ret = attrd_lazy_update(cmd, node,
+		name, value, XML_CIB_TAG_STATUS, NULL, NULL);
+	if (confirm == FALSE)
+		goto out;
+	if (ret == TRUE) {
+		enum cib_errors rc;
+		int i;
+		ret = FALSE; value = NULL;
+		for (i = 1; i <= READ_CIB_RETRY; i++) {
+			dbg_printf(4, "%s: waiting..[%d]\n", __FUNCTION__, i);
+			sleep(1);
+#ifdef PM_1_0
+			rc = read_attr(*cib, XML_CIB_TAG_STATUS,
+				uuid, NULL, NULL, name, &value, FALSE);
+#else
+			rc = read_attr(*cib, XML_CIB_TAG_STATUS,
+				uuid, NULL, NULL, NULL, name, &value, FALSE);
+#endif
+			dbg_printf(3, "%s: cmd=%c, rc=%d, value=%s\n",
+				__FUNCTION__, cmd, rc, value);
+			if (rc == cib_ok) {
+				if (cmd == 'U' && !g_strcmp0(value, ATTR_VALUE)) {
+					ret = TRUE;
+					break;
+				}
+			} else if (rc == cib_NOTEXISTS) {
+				if (cmd == 'D') {
+					ret = TRUE;
+					break;
+				}
+			} else {
+				break;
+			}
+			crm_free(value);
+		}
+		crm_free(value);
+	}
+out:
+	crm_free(name);
+	return ret;
+}
+
+/*
+ * ref. pacemaker/tools/crm_resource.c
+ */
+static enum cib_errors
+find_meta_attr(const char *rid, const char *name, char **id)
+{
+	char *xpath;
+	xmlNode *xml = NULL;
+	const char *p;
+	enum cib_errors rc;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	xpath = g_strdup_printf("%s/*[ id=\"%s\"]/%s/nvpair[ name=\"%s\"]",
+		get_object_path("resources"), rid, XML_TAG_META_SETS, name);
+	dbg_printf(3, "%s: query=%s\n", __FUNCTION__, xpath);
+
+	rc = (*cib)->cmds->query(*cib, xpath, &xml,
+		cib_sync_call|cib_scope_local|cib_xpath);
+	if (rc != cib_ok) {
+		if (rc != cib_NOTEXISTS) {
+			syslog(LOG_NOTICE, "failed to query to cib: %s\n",
+				cib_error2string(rc));
+			printf("failed to query to cib: %s\n",
+				cib_error2string(rc));
+		}
+		crm_free(xpath);
+		return rc;
+	}
+	crm_log_xml_debug(xml, "Match");
+
+	p = crm_element_value(xml, XML_ATTR_ID);
+	if (p)
+		*id = crm_strdup(p);
+	crm_free(xpath);
+	free_xml(xml);
+	return rc;
+}
+
+/*
+ * ref. pacemaker/tools/crm_resource.c
+ */
+static gboolean
+set_rsc_role(const char *rid, const char *value)
+{
+	resource_t *rsc;
+	char *id = NULL;
+	xmlNode *top = NULL, *obj = NULL;
+	enum cib_errors rc;
+	const char *name = XML_RSC_ATTR_TARGET_ROLE;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	rsc = pe_find_resource(data_set.resources, rid);
+	if (!rsc)
+		return FALSE;
+
+	rc = find_meta_attr(rid, name, &id);
+	if (rc == cib_ok) {
+		dbg_printf(3, "%s: Found a match for name=%s: id=%s\n",
+			__FUNCTION__, name, id);
+	} else if (rc == cib_NOTEXISTS) {
+		char *set;
+		set = crm_concat(rid, XML_TAG_META_SETS, '-');
+		id = crm_concat(set, name, '-');
+		top = create_xml_node(NULL, crm_element_name(rsc->xml));
+		crm_xml_add(top, XML_ATTR_ID, rid);
+		obj = create_xml_node(top, XML_TAG_META_SETS);
+		crm_xml_add(obj, XML_ATTR_ID, set);
+		crm_free(set);
+	} else {
+		return FALSE;
+	}
+
+	obj = create_xml_node(obj, XML_CIB_TAG_NVPAIR);
+	if (!top)
+		top = obj;
+	crm_xml_add(obj, XML_ATTR_ID, id);
+	crm_xml_add(obj, XML_NVPAIR_ATTR_NAME, name);
+	crm_xml_add(obj, XML_NVPAIR_ATTR_VALUE, value);
+
+	dbg_printf(1, "%s: Update meta-attr %s=%s for %s\n",
+		__FUNCTION__, name, value, rid);
+	crm_log_xml_debug(top, "Update");
+
+	rc = (*cib)->cmds->modify(*cib, XML_CIB_TAG_RESOURCES, top, cib_sync_call);
+	if (rc != cib_ok) {
+		syslog(LOG_NOTICE,
+			"failed to modify to cib: %s\n", cib_error2string(rc));
+		printf("failed to modify to cib: %s\n", cib_error2string(rc));
+	}
+	free_xml(top);
+	crm_free(id);
+	return rc == cib_ok ? TRUE : FALSE;
+}
+
+static gboolean
+start_resource(const char *rid)
+{
+	gboolean updated_cib = FALSE;
+	int i = 0;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (!rid)
+		return FALSE;
+
+	printf("Starting domain %s(resource)\n", rid);
+
+check:
+	if (i >= READ_CIB_RETRY)
+		return FALSE;
+	switch (get_rsc_status(rid, NULL, NULL)) {
+	case RS_STARTED:
+		dbg_printf(2, "%s: Resource %s started\n", __FUNCTION__, rid);
+		return TRUE;
+	case RS_STOPPED:
+		if (updated_cib == FALSE) {
+			if (set_rsc_role(rid, RSC_ROLE_STARTED_S) == FALSE)
+				return FALSE;
+			updated_cib = TRUE;
+		} else {
+			i++;
+		}
+		dbg_printf(4, "%s: waiting..[%d]\n", __FUNCTION__, i);
+		sleep(1);
+		goto check;
+	default:
+		return FALSE;
+	}
+}
+
+static gboolean
+stop_resource(const char *rid)
+{
+	char *node = NULL, *uuid = NULL;
+	gboolean updated_cib = FALSE;
+	gboolean ret = FALSE;
+	int i = 0;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (!rid)
+		return FALSE;
+
+	printf("Destroying domain %s(resource)\n", rid);
+
+check:
+	if (i >= READ_CIB_RETRY)
+		goto rollback;
+	switch (get_rsc_status(rid, &node, &uuid)) {
+	case RS_STARTED:
+		if (updated_cib == FALSE) {
+			if (update_status_attr('U', rid, node, uuid, TRUE) == FALSE)
+				goto out;
+			if (set_rsc_role(rid, RSC_ROLE_STOPPED_S) == FALSE)
+				goto rollback;
+			updated_cib = TRUE;
+		} else {
+			i++;
+		}
+		dbg_printf(4, "%s: waiting..[%d]\n", __FUNCTION__, i);
+		sleep(1);
+		goto check;
+	case RS_STOPPED:
+		dbg_printf(2, "%s: Resource %s stopped\n", __FUNCTION__, rid);
+		if (updated_cib == FALSE)
+			ret = TRUE;
+		else
+			ret = update_status_attr('D', rid, node, uuid, TRUE);
+		goto out;
+	default:
+		goto out;
+	}
+rollback:
+	update_status_attr('D', rid, node, uuid, FALSE);
+out:
+	if (node) crm_free(node);
+	if (uuid) crm_free(uuid);
+	return ret;
+}
+
+static int
+char2level(const char *str)
+{
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (!str)
+		return 0;
+	if (safe_str_eq(str, "emerg")) return LOG_EMERG;
+	else if (safe_str_eq(str, "alert")) return LOG_ALERT;
+	else if (safe_str_eq(str, "crit")) return LOG_CRIT;
+	else if (safe_str_eq(str, "err") ||
+		 safe_str_eq(str, "error")) return LOG_ERR;
+	else if (safe_str_eq(str, "warning") ||
+		 safe_str_eq(str, "warn")) return LOG_WARNING;
+	else if (safe_str_eq(str, "notice")) return LOG_NOTICE;
+	else if (safe_str_eq(str, "info")) return LOG_INFO;
+	else if (safe_str_eq(str, "debug")) return LOG_DEBUG;
+	else if (safe_str_eq(str, "debug2")) return LOG_DEBUG + 1;
+	else if (safe_str_eq(str, "debug3")) return LOG_DEBUG + 2;
+	else if (safe_str_eq(str, "debug4")) return LOG_DEBUG + 3;
+	else if (safe_str_eq(str, "debug5")) return LOG_DEBUG + 4;
+	else if (safe_str_eq(str, "debug6")) return LOG_DEBUG + 5;
+	return 0;
+}
+
+static void
+reset_lib_log(unsigned int level)
+{
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	cl_log_set_entity(BACKEND_NAME);
+	set_crm_log_level(level);
+}
+
+
+static int
+pf_null(const char *rid, void *priv)
+{
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	printf("NULL operation: returning failure\n");
+	return 1;
+}
+
+
+static int
+pf_off(const char *rid, const char *src, uint32_t seqno, void *priv)
+{
+	struct pf_info *info = (struct pf_info *)priv;
+	int ret;
+
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	VALIDATE(info);
+	reset_lib_log(info->loglevel);
+	cib = &info->cib;
+
+	ret = stop_resource(rid) == TRUE ? 0 : 1;
+	free_dataset();
+	return ret;
+}
+
+
+static int
+pf_on(const char *rid, const char *src, uint32_t seqno, void *priv)
+{
+	struct pf_info *info = (struct pf_info *)priv;
+	int ret;
+
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	VALIDATE(info);
+	reset_lib_log(info->loglevel);
+	cib = &info->cib;
+
+	ret = start_resource(rid) == TRUE ? 0 : 1;
+	free_dataset();
+	return ret;
+}
+
+
+static int
+pf_devstatus(void *priv)
+{
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	if (priv)
+		return 0;
+	return 1;
+}
+
+static int
+pf_status(const char *rid, void *priv)
+{
+	struct pf_info *info = (struct pf_info *)priv;
+	enum rsc_status rstat;
+
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	VALIDATE(info);
+	reset_lib_log(info->loglevel);
+	cib = &info->cib;
+
+	rstat = get_rsc_status(rid, NULL, NULL);
+	dbg_printf(3, "%s: get_rsc_status [%d]\n", __FUNCTION__, rstat);
+	free_dataset();
+
+	switch (rstat) {
+	case RS_STARTED:
+		return RESP_SUCCESS;
+	case RS_STOPPED:
+		return RESP_OFF;
+	case RS_UNDEFINED:
+	case RS_GETERROR:
+	default:
+		return RESP_FAIL;
+	}
+}
+
+
+static int
+pf_reboot(const char *rid, const char *src, uint32_t seqno, void *priv)
+{
+	struct pf_info *info = (struct pf_info *)priv;
+	int ret = 1;
+
+	dbg_printf(5, "%s: Resource %s\n", __FUNCTION__, rid);
+
+	VALIDATE(info);
+	reset_lib_log(info->loglevel);
+	cib = &info->cib;
+
+	if (stop_resource(rid) == TRUE)
+		ret = start_resource(rid) == TRUE ? 0 : ret;
+	free_dataset();
+	return ret;
+}
+
+
+/*
+ * Not implemented, because it is not called from the STONITH plug-in.
+ */
+static int
+pf_hostlist(hostlist_callback callback, void *arg, void *priv)
+{
+	struct pf_info *info = (struct pf_info *)priv;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	VALIDATE(info);
+	return 1;
+}
+
+
+static int
+pf_init(backend_context_t *c, config_object_t *conf)
+{
+	struct pf_info *info = NULL;
+	int level = 0;
+	char value[256];
+	char key[32];
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+#ifdef _MODULE
+	if (sc_get(conf, "fence_virtd/@debug", value, sizeof(value)) == 0)
+		dset(atoi(value));
+#endif
+	sprintf(key, "backends/%s/@pmlib_loglevel", BACKEND_NAME);
+	if (sc_get(conf, key, value, sizeof(value)) == 0) {
+		level = char2level(value);
+		crm_log_init(BACKEND_NAME, level, FALSE, FALSE, 0, NULL);
+		cl_log_enable_stdout(TRUE);
+	}
+
+	info = malloc(sizeof(*info));
+	if (!info)
+		return -1;
+
+	memset(info, 0, sizeof(*info));
+	info->magic = MAGIC;
+	info->loglevel = level;
+	*c = (void *)info;
+	return 0;
+}
+
+
+static int
+pf_shutdown(backend_context_t c)
+{
+	struct pf_info *info = (struct pf_info *)c;
+
+	dbg_printf(5, "%s\n", __FUNCTION__);
+
+	VALIDATE(info);
+	reset_lib_log(info->loglevel);
+	cib = &info->cib;
+
+	disconnect_cib();
+	free(info);
+	return 0;
+}
+
+
+static fence_callbacks_t pf_callbacks = {
+	.null = pf_null,
+	.off = pf_off,
+	.on = pf_on,
+	.reboot = pf_reboot,
+	.status = pf_status,
+	.devstatus = pf_devstatus,
+	.hostlist = pf_hostlist
+};
+
+static backend_plugin_t pf_plugin = {
+	.name = BACKEND_NAME,
+	.version = VERSION,
+	.callbacks = &pf_callbacks,
+	.init = pf_init,
+	.cleanup = pf_shutdown,
+};
+
+
+#ifdef _MODULE
+double
+BACKEND_VER_SYM(void)
+{
+	return PLUGIN_VERSION_BACKEND;
+}
+
+const backend_plugin_t *
+BACKEND_INFO_SYM(void)
+{
+	return &pf_plugin;
+}
+#else
+static void __attribute__((constructor))
+pf_register_plugin(void)
+{
+	plugin_reg_backend(&pf_plugin);
+}
+#endif
#!/bin/sh
#
# Support:      linux-ha lists linux-ha org
# License:      GNU General Public License (GPL)
#
#   Resource Agent for domains managed by the libvirt API.
#   Requires a running libvirt daemon (libvirtd).
#
#   (c) 2008-2010 Florian Haas, Dejan Muhamedagic,
#                 and Linux-HA contributors
#
#	usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}
#
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Defaults
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)"

: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}}
#######################################################################

## I'd very much suggest to make this RA use bash,
## and then use magic $SECONDS.
## But for now:
NOW=$(date +%s)

usage() {
  echo "usage: $0 {start|stop|status|monitor|migrate_to|migrate_from|meta-data|validate-all}"
}

meta_data() {
	cat <<EOF
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="VirtualDomain">
<version>1.1</version>

<longdesc lang="en">
Resource agent for a virtual domain (a.k.a. domU, virtual machine,
virtual environment etc., depending on context) managed by libvirtd.
</longdesc>
<shortdesc lang="en">Manages virtual domains through the libvirt virtualization framework</shortdesc>

<parameters>

<parameter name="config" unique="1" required="1">
<longdesc lang="en">
Absolute path to the libvirt configuration file,
for this virtual domain.
</longdesc>
<shortdesc lang="en">Virtual domain configuration file</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="hypervisor" unique="0" required="0">
<longdesc lang="en">
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
</longdesc>
<shortdesc lang="en">Hypervisor URI</shortdesc>
<content type="string" default="${OCF_RESKEY_hypervisor_default}"/>
</parameter>

<parameter name="force_stop" unique="0" required="0">
<longdesc lang="en">
Always forcefully shut down ("destroy") the domain on stop. The default
behavior is to resort to a forceful shutdown only after a graceful
shutdown attempt has failed. You should only set this to true if
your virtual domain (or your virtualization backend) does not support
graceful shutdown.
</longdesc>
<shortdesc lang="en">Always force shutdown on stop</shortdesc>
<content type="boolean" default="${OCF_RESKEY_force_stop_default}" />
</parameter>

<parameter name="migration_transport" unique="0" required="0">
<longdesc lang="en">
Transport used to connect to the remote hypervisor while
migrating. Please refer to the libvirt documentation for details on
transports available. If this parameter is omitted, the resource will
use libvirt's default transport to connect to the remote hypervisor.
</longdesc>
<shortdesc lang="en">Remote hypervisor transport</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
adding this parameters value to the end of the node name. If the node
name happens to be an FQDN (as opposed to an unqualified host name),
insert the suffix immediately prior to the first period (.) in the FQDN.
At the moment Qemu/KVM and Xen migration via a dedicated network is supported.

Note: Be sure this composed host name is locally resolveable and the
associated IP is reachable through the favored network.
</longdesc>
<shortdesc lang="en">Migration network host name suffix</shortdesc>
<content type="string" default="" />
</parameter>

<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the virtual domain, add this
parameter with a list of scripts to monitor.

Note: when monitor scripts are used, the start and migrate_from operations
will complete only when all monitor scripts have completed successfully.
Be sure to set the timeout of these operations to accommodate this delay.
</longdesc>
<shortdesc lang="en">space-separated list of monitor scripts</shortdesc>
<content type="string" default="" />
</parameter>

</parameters>

<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="90" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="migrate_from" timeout="60" />
<action name="migrate_to" timeout="120" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
</resource-agent>
EOF
}

# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"

# A state file where we record the domain name:
STATEFILE="${HA_RSCTMP}/VirtualDomain-${OCF_RESOURCE_INSTANCE}.state"

VirtualDomain_Define() {
    local virsh_output
    local domain_name
    # Note: passing in the domain name from outside the script is
    # intended for testing and debugging purposes only. Don't do this
    # in production, instead let the script figure out the domain name
    # from the config file. You have been warned.
    if [ -z "$DOMAIN_NAME" ]; then
	# Spin until we have a domain name
	while true; do
	    virsh_output=`virsh ${VIRSH_OPTIONS} define ${OCF_RESKEY_config}`
	    domain_name=`echo "$virsh_output" | sed -e 's/Domain \(.*\) defined from .*$/\1/'`
            if [ -n "$domain_name" ]; then
		break;
            fi
	    ocf_log debug "Domain not defined yet, probably unable to connect to hypervisor. Retrying."
            sleep 1
	done
	echo "$domain_name" > $STATEFILE
	ocf_log info "Domain name \"$domain_name\" saved to $STATEFILE."
    else
	ocf_log warn "Domain name ${DOMAIN_NAME} already defined, overriding configuration file ${OCF_RESKEY_config}. You should do this for testing only."
    fi
}

VirtualDomain_Cleanup_Statefile() {
    rm -f $STATEFILE || ocf_log warn "Failed to remove $STATEFILE during $__OCF_ACTION."
}

VirtualDomain_Status() {
    local try=0
    rc=$OCF_ERR_GENERIC
    status="no state"
    while [ "$status" = "no state" ]; do
	try=$(($try + 1 ))
        status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME`"
        case "$status" in
	    "shut off")
	        # shut off: domain is defined, but not started
		ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
	        rc=$OCF_NOT_RUNNING
	        ;;
	    running|paused|idle|blocked)
		# running: domain is currently actively consuming cycles
		# paused: domain is paused (suspended)
		# idle: domain is running but idle
		# blocked: synonym for idle used by legacy Xen versions
		ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
	        rc=$OCF_SUCCESS
	        ;;
            ""|"no state")
		# Empty string may be returned when virsh does not
		# receive a reply from libvirtd.
		# "no state" may occur when the domain is currently
		# being migrated (on the migration target only), or
		# whenever virsh can't reliably obtain the domain
		# state.
		status="no state"
		if [ "$__OCF_ACTION" = "stop" ] && [ $try -ge 3 ]; then
		    # During the stop operation, we want to bail out
		    # quickly, so as to be able to force-stop (destroy)
		    # the domain if necessary.
		    ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
		    return $OCF_ERR_GENERIC;
		else
		    # During all other actions, we just wait and try
		    # again, relying on the CRM/LRM to time us out if
		    # this takes too long.
		    ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
		    sleep 1
		fi
		;;
            *)
		# any other output is unexpected.
                ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
                ;;
        esac
    done
    return $rc
}

VirtualDomain_Start() {
    if VirtualDomain_Status; then
	ocf_log info "Virtual domain $DOMAIN_NAME already running."
	return $OCF_SUCCESS
    fi

    virsh $VIRSH_OPTIONS start ${DOMAIN_NAME}
    rc=$?
    if [ $rc -ne 0 ]; then
	ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}."
	return $OCF_ERR_GENERIC
    fi

    while ! VirtualDomain_Monitor; do
	sleep 1
    done
    return $OCF_SUCCESS
}

VirtualDomain_Stop() {
    local i
    local status
    local shutdown_timeout
    local out ex

    VirtualDomain_Status
    status=$?

    # Check the forced shutdown (destroy) FLAG requested by the vm-stonith function.
    if ! ocf_is_true $OCF_RESKEY_force_stop; then
	local xpath="//cib/status/node_state[ uname='`hostname`']/transient_attributes/instance_attributes/nvpair[ name='force_stop-${OCF_RESOURCE_INSTANCE}'][ value='true']"
	$HA_SBIN_DIR/cibadmin -Q -A"$xpath" 1>/dev/null 2>&1
	if [ $? -eq 0 ]; then
	    ocf_log info "set variable OCF_RESKEY_force_stop=1"
	    OCF_RESKEY_force_stop=1
	fi
    fi

    case $status in
	$OCF_SUCCESS)
	    if ! ocf_is_true $OCF_RESKEY_force_stop; then
		# Issue a graceful shutdown request
		ocf_log info "Issuing graceful shutdown request for domain ${DOMAIN_NAME}."
		virsh $VIRSH_OPTIONS shutdown ${DOMAIN_NAME}
	        # The "shutdown_timeout" we use here is the operation
		# timeout specified in the CIB, minus 5 seconds
		shutdown_timeout=$(( $NOW + ($OCF_RESKEY_CRM_meta_timeout/1000) -5 ))
		# Loop on status until we reach $shutdown_timeout
		while [ $NOW -lt $shutdown_timeout ]; do
		    VirtualDomain_Status
		    status=$?
		    case $status in
			$OCF_NOT_RUNNING)
			    # This was a graceful shutdown. Clean
			    # up and return.
			    VirtualDomain_Cleanup_Statefile
			    return $OCF_SUCCESS
			    ;;
			$OCF_SUCCESS)
			    # Domain is still running, keep
			    # waiting (until shutdown_timeout
			    # expires)
			    sleep 1
			    ;;
			*)
			    # Something went wrong. Bail out and
			    # resort to forced stop (destroy).
			    break;
		    esac
		    NOW=$(date +%s)
		done
	    fi
	    ;;
	$OCF_NOT_RUNNING)
	    ocf_log info "Domain $DOMAIN_NAME already stopped."
	    return $OCF_SUCCESS
    esac
    # OK. Now if the above graceful shutdown hasn't worked, kill
    # off the domain with destroy. If that too does not work,
    # have the LRM time us out.
    ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
    out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
    ex=$?
    echo >&2 "$out"
    # unconditionally clean up.
    VirtualDomain_Cleanup_Statefile
    case $ex$out in
	*"error:"*"domain is not running"*)
	    : ;; # unexpected path to the intended outcome, all is well
	[!0]*)
	    return $OCF_ERR_GENERIC ;;
	0*)
	    while [ $status != $OCF_NOT_RUNNING ]; do
		VirtualDomain_Status
		status=$?
	    done ;;
    esac
    return $OCF_SUCCESS
}

VirtualDomain_Migrate_To() {
    local target_node
    local remoteuri
    local transport_suffix
    local migrateuri
    local migrateport
    local migrate_target
    local hypervisor

    target_node="$OCF_RESKEY_CRM_meta_migrate_target"

    if VirtualDomain_Status; then
        # Find out the remote hypervisor to connect to. That is, turn
        # something like "qemu://foo:9999/system" into
        # "qemu+tcp://bar:9999/system"
	if [ -n "${OCF_RESKEY_migration_transport}" ]; then
	    transport_suffix="+${OCF_RESKEY_migration_transport}"
	fi
	# A typical migration URI via a special  migration network looks
	# like "tcp://bar-mig:49152". The port would be randomly chosen
	# by libvirt from the range 49152-49215 if omitted, at least since
	# version 0.7.4 ...
	if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
	    hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
	    # Hostname might be a FQDN
	    migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
	    case $hypervisor in
		qemu)
		    # For quiet ancient libvirt versions a migration port is needed
		    # and the URI must not contain the "//". Newer versions can handle
		    # the "bad" URI.
		    migrateport=$(( 49152 + $(ocf_maybe_random) % 64 ))
		    migrateuri="tcp:${migrate_target}:${migrateport}"
		    ;;
		xen)
		    migrateuri="xenmigr://${migrate_target}"
		    ;;
		*)
		    ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
		    ;;
	    esac
	fi
        # Scared of that sed expression? So am I. :-)
	remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")

	# OK, we know where to connect to. Now do the actual migration.
	ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})."
	virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri}
	rc=$?
	if [ $rc -ne 0 ]; then
	    ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc"
	    return $OCF_ERR_GENERIC
	else
	    ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
	    VirtualDomain_Cleanup_Statefile
	    return $OCF_SUCCESS
	fi
    else
	ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!"
	return $OCF_ERR_GENERIC
    fi
}

VirtualDomain_Migrate_From() {
    while ! VirtualDomain_Monitor; do
	sleep 1
    done
    ocf_log info "$DOMAIN_NAME: live migration from ${OCF_RESKEY_CRM_meta_migrate_source} succeeded."
    return $OCF_SUCCESS
}

VirtualDomain_Monitor() {
    # First, check the domain status. If that returns anything other
    # than $OCF_SUCCESS, something is definitely wrong.
    VirtualDomain_Status
    rc=$?
    if [ ${rc} -eq ${OCF_SUCCESS} ]; then
	# OK, the generic status check turned out fine.  Now, if we
	# have monitor scripts defined, run them one after another.
	for script in ${OCF_RESKEY_monitor_scripts}; do
	    script_output="$($script 2>&1)"
	    script_rc=$?
	    if [ ${script_rc} -ne ${OCF_SUCCESS} ]; then
		# A monitor script returned a non-success exit
		# code. Stop iterating over the list of scripts, log a
		# warning message, and propagate $OCF_ERR_GENERIC.
		ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
		rc=$OCF_ERR_GENERIC
		break
	    else
		ocf_log debug "Monitor command \"${script}\" for domain ${DOMAIN_NAME} completed successfully with output: ${script_output}"
	    fi
	done
    fi
    return ${rc}
}

VirtualDomain_Validate_All() {
    # Required binaries:
    for binary in virsh sed; do
        check_binary $binary
    done

    if [ -z $OCF_RESKEY_config ]; then
	ocf_log error "Missing configuration parameter \"config\"."
	return $OCF_ERR_CONFIGURED
    fi

    # check if we can read the config file (otherwise we're unable to
    # deduce $DOMAIN_NAME from it, see below)
    if [ ! -r $OCF_RESKEY_config ]; then
	if ocf_is_probe; then
	    ocf_log info "Configuration file $OCF_RESKEY_config not readable during probe."
	else
	    ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable."
	    return $OCF_ERR_INSTALLED
	fi
    fi
}

if [ $# -ne 1 ]; then
  usage
  exit $OCF_ERR_ARGS
fi

case $1 in
  meta-data)		meta_data
			exit $OCF_SUCCESS
			;;
  usage)		usage
			exit $OCF_SUCCESS
			;;
esac

# Everything except usage and meta-data must pass the validate test
VirtualDomain_Validate_All || exit $?

# Delete the forced shutdown (destroy) FLAG created by the vm-stonith function.
if ocf_is_probe || [ "$__OCF_ACTION" = "start" ]; then
    xpath="//cib/status/node_state/transient_attributes/instance_attributes/nvpair[ name='force_stop-${OCF_RESOURCE_INSTANCE}'][ value='true']"
    $HA_SBIN_DIR/cibadmin -d -A"$xpath" --force 1>/dev/null 2>&1
fi

# During a probe, it is permissible for the config file to not be
# readable (it might be on shared storage not available during the
# probe). In that case, VirtualDomain_Define can't work and we're
# unable to get the domain name. Thus, we also can't check whether the
# domain is running. The only thing we can do here is to assume that
# it is not running.
if [ ! -r $OCF_RESKEY_config ]; then
    ocf_is_probe && exit $OCF_NOT_RUNNING
    [ "$__OCF_ACTION" = "stop" ] && exit $OCF_SUCCESS
fi

# Define the domain on startup, and re-define whenever someone deleted
# the state file, or touched the config.
if [ ! -e $STATEFILE ] || [ $OCF_RESKEY_config -nt $STATEFILE ]; then
    VirtualDomain_Define
fi
# By now, we should definitely be able to read from the state file.
# If not, something went wrong.
if [ ! -r $STATEFILE ]; then
    ocf_log err "$STATEFILE not found or unreadable. This is unexpected. Cannot determine domain name."
    exit $OCF_ERR_GENERIC
fi
# Finally, retrieve the domain name from the state file.
DOMAIN_NAME=`cat $STATEFILE 2>/dev/null`
if [ -z $DOMAIN_NAME ]; then
    ocf_log err "$STATEFILE is empty. This is unexpected. Cannot determine domain name."
    exit $OCF_ERR_GENERIC
fi

case $1 in
    start)
	VirtualDomain_Start
	;;
    stop)
	VirtualDomain_Stop
	;;
    migrate_to)
	VirtualDomain_Migrate_To
	;;
    migrate_from)
	VirtualDomain_Migrate_From
	;;
    status)
	VirtualDomain_Status
	;;
    monitor)
	VirtualDomain_Monitor
        ;;
    validate-all)
	;;
    *)
	usage
	exit $OCF_ERR_UNIMPLEMENTED
	;;
esac
exit $?
#!/bin/sh
#
# External STONITH module for fence-virt.
#
# Copyright (c) 2010 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
check_delimeter() {
	: ${delimeter:=:}
	if [ "$delimeter" = " " ]; then
		ha_log.sh err "Invalid delimeter [$delimeter]."
		exit 6	#ERR_CONFIGURED
	fi
}

ha_log.sh debug "\$*: [$*]" >/dev/null 2>&1
case $1 in
gethosts)
	check_delimeter
	for h in $hostlist; do
		echo $h | awk -F $delimeter '{print $1}'
	done
	exit 0
	;;
on|off|reset|status)
	if [ "x$hostlist" = "x" ]; then
		ha_log.sh err "hostlist isn't set."
		exit 6	#ERR_CONFIGURED
	fi
	check_delimeter

	if [ "x$fencing_agent" = "x" ]; then
		ha_log.sh err "fencing_agent isn't set."
		exit 6	#ERR_CONFIGURED
	fi
	fencing_agent=`echo "$fencing_agent -t 3600"`

	op=`echo $1 | sed "s/reset/reboot/"`
	target=`echo $2 | tr A-Z a-z`

	for h in $hostlist; do
		host=`echo $h | awk -F $delimeter '{print $1}' | tr A-Z a-z`
		rsc=`echo $h | awk -F $delimeter '{print $2}'`

		if [ "$op" != "status" -a "$target" != "$host" ]; then
			continue
		fi

		while true; do
			ha_log.sh info "Request: target=$host($rsc), op=$1($op)"
			ha_log.sh debug "$fencing_agent -o $op -H $rsc"
			$fencing_agent -o $op -H $rsc 2>/dev/null
			rc=$?
			ha_log.sh info "Result: $rc"
			if [ $rc -eq 0 -o $rc -eq 2 ]; then
				if [ "$op" = "status" ]; then
					break
				else
					exit 0
				fi
			elif [ $rc -eq 1 -o $rc -eq 3 ]; then
				ha_log.sh err "request failed."
				exit 1
			else
				ha_log.sh info "request failed."
				sleep 3
				continue
			fi
		done
	done
	if [ "$op" = "status" ]; then
		exit 0
	else
		exit 1
	fi
	;;
getconfignames)
	echo "hostlist delimeter"
	exit 0
	;;
getinfo-devid)
	echo "vm-stonith STONITH device"
	exit 0
	;;
getinfo-devname)
	echo "vm-stonith STONITH external device"
	exit 0
	;;
getinfo-devdescr)
	echo "Allows STONITH to control guests managed by a CRM/Pacemaker host."
	echo "Requires VM + CRM/Pacemaker at both layers."
	exit 0
	;;
getinfo-devurl)
	echo "fence-virt -> http://sourceforge.net/projects/fence-virt/";
	exit 0
	;;
getinfo-xml)
	cat <<VMSTONITHXML
<parameters>
<parameter name="hostlist" unique="0" required="1">
<content type="string" />
<shortdesc lang="en">
Host Map
</shortdesc>
<longdesc lang="en">
A mapping of hostname and resource ID supported by this device.
For example: "guest-a1:rscid guest-a2:rscid"
 * rscid : resource ID of the virtual machine managed by the cluster of host.
</longdesc>
</parameter>
<parameter name="delimeter" unique="0" required="0">
<content type="string" />
<shortdesc lang="en">
Delimeter of hostname and resource ID
</shortdesc>
<longdesc lang="en">
The delimiter of the hostname and resource ID in hostlist parameter.
(The space character cannot be specified.)
</longdesc>
</parameter>
<parameter name="fencing_agent" unique="0" required="1">
<content type="string" />
<shortdesc lang="en">
Fencing agent command
</shortdesc>
<longdesc lang="en">
Specify the fencing agent command (fence_virt or fence_xvm) with parameters.
The following options need not be specified.
  -o : Fencing action.
  -H : Virtual machine to fence.
  -t : Fencing timeout.
For example: "fence_virt -D /dev/ttyS1"
</longdesc>
</parameter>
</parameters>
VMSTONITHXML
	exit 0
	;;
*)
	exit 1
	;;
esac

Attachment: overview.png
Description: PNG image

fence_virtd {
	listener = "serial";
	backend = "pm-fence";
	module_path = "/usr/lib64/fence-virt";
}

listeners {
	serial {
		mode = "serial";
		path = "/var/lib/libvirt/qemu";
	}
}

#backends {
#	pm-fence {
#		pmlib_loglevel = "debug";
#	}
#}

groups {
	group {
		uuid = "621d8c61-1070-7aab-6158-3889d68470ab";
		uuid = "4f0ed127-c8a3-4085-a0d0-a93231924202";
		uuid = "prmGuest-a1";
		uuid = "prmGuest-a2";
	}
}
property no-quorum-policy="freeze" \
	stonith-enabled="false" \
	startup-fencing="false"

rsc_defaults resource-stickiness="INFINITY" \
	migration-threshold="5"

primitive prmGuest-a1 ocf:extra:VirtualDomain \
	meta allow-migrate="true" \
	params config="/etc/libvirt/qemu/srv-a1.xml" hypervisor="qemu:///system" migration_transport="ssh" \
	op start timeout="120s" on-fail="restart" \
	op monitor interval="10s" timeout="30s" on-fail="restart" \
	op stop timeout="120s" on-fail="block" \
	op migrate_to interval="0s" timeout="120s" on-fail="block" \
	op migrate_from interval="0s" timeout="120s" on-fail="restart"

primitive prmGuest-a2 ocf:extra:VirtualDomain \
	meta allow-migrate="true" \
	params config="/etc/libvirt/qemu/srv-a2.xml" hypervisor="qemu:///system" migration_transport="ssh" \
	op start timeout="120s" on-fail="restart" \
	op monitor interval="10s" timeout="30s" on-fail="restart" \
	op stop timeout="120s" on-fail="block" \
	op migrate_to interval="0s" timeout="120s" on-fail="block" \
	op migrate_from interval="0s" timeout="120s" on-fail="restart"
[root x3650g ~]# crm_mon -rfA1
============
Last updated: Tue Sep 20 14:02:15 2011
Stack: Heartbeat
Current DC: x3650g (32611d84-9cb5-410e-af1d-7b2c1ad2e443) - partition with quorum
Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87
2 Nodes configured, unknown expected votes
2 Resources configured.
============

Online: [ x3650f x3650g ]

Full list of resources:

 prmGuest-a1    (ocf::extra:VirtualDomain):     Started x3650f
 prmGuest-a2    (ocf::extra:VirtualDomain):     Started x3650g

Node Attributes:
* Node x3650f:
* Node x3650g:

Migration summary:
* Node x3650g:
* Node x3650f:
[root x3650g ~]#
property no-quorum-policy="ignore" \
	stonith-enabled="true" \
	startup-fencing="false" \
	stonith-timeout="90s" \
	stonith-action="reboot"

rsc_defaults resource-stickiness="INFINITY" \
	migration-threshold="1"

primitive prmDummy ocf:pacemaker:Dummy \
	op start timeout="90s" on-fail="restart" \
	op monitor interval="10s" timeout="20s" on-fail="restart" \
	op stop timeout="100s" on-fail="fence"

clone clnVmStonith \
	prmVmStonith
primitive prmVmStonith stonith:external/vm-stonith \
	params \
		priority="1" \
		hostlist="srv-a1:prmGuest-a1 srv-a2:prmGuest-a2" \
		fencing_agent="/usr/sbin/fence_virt -D/dev/ttyS1" \
	meta \
		migration-threshold="10" \
	op start timeout="60s" \
	op monitor interval="300s" timeout="60s" \
	op stop timeout="60s"
[root srv-a2 ~]# crm_mon -rfA1
============
Last updated: Tue Sep 20 14:03:04 2011
Stack: Heartbeat
Current DC: srv-a2 (f1dfa428-cd91-487a-bd4c-d61ef3cd500b) - partition with quorum
Version: 1.0.11-1554a83db0d3c3e546cfd3aaff6af1184f79ee87
2 Nodes configured, unknown expected votes
2 Resources configured.
============

Online: [ srv-a2 srv-a1 ]

Full list of resources:

 prmDummy       (ocf::pacemaker:Dummy): Started srv-a2
 Clone Set: clnVmStonith
     Started: [ srv-a1 srv-a2 ]

Node Attributes:
* Node srv-a2:
* Node srv-a1:

Migration summary:
* Node srv-a2:
* Node srv-a1:
[root srv-a2 ~]#

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]