[Cluster-devel] cluster/fence/agents/xvm fence_xvmd.c README
lhh at sourceware.org
lhh at sourceware.org
Fri Dec 1 22:14:41 UTC 2006
CVSROOT: /cvs/cluster
Module name: cluster
Changes by: lhh at sourceware.org 2006-12-01 22:14:40
Modified files:
fence/agents/xvm: fence_xvmd.c README
Log message:
Handle 0.1.9 case of libvirt returning a virDomainPtr + state for a VM that doesn't exist (vm state == VIR_DOMAIN_SHUTOFF)
Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/fence_xvmd.c.diff?cvsroot=cluster&r1=1.5&r2=1.6
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/fence/agents/xvm/README.diff?cvsroot=cluster&r1=1.1&r2=1.2
--- cluster/fence/agents/xvm/fence_xvmd.c 2006/11/13 16:13:50 1.5
+++ cluster/fence/agents/xvm/fence_xvmd.c 2006/12/01 22:14:40 1.6
@@ -207,6 +207,54 @@
}
+static inline int
+wait_domain(fence_req_t *req, virConnectPtr vp, int timeout)
+{
+ int tries = 0;
+ int response = 1;
+ virDomainPtr vdp;
+ virDomainInfo di;
+
+ if (!(vdp = get_domain(req, vp)))
+ return 0;
+
+ /* Check domain liveliness. If the domain is still here,
+ we return failure, and the client must then retry */
+ /* XXX On the xen 3.0.4 API, we will be able to guarantee
+ synchronous virDomainDestroy, so this check will not
+ be necessary */
+ do {
+ sleep(1);
+ vdp = get_domain(req, vp);
+ if (!vdp) {
+ dprintf(2, "Domain no longer exists\n");
+ response = 0;
+ break;
+ }
+
+ memset(&di, 0, sizeof(di));
+ virDomainGetInfo(vdp, &di);
+ virDomainFree(vdp);
+
+ if (di.state == VIR_DOMAIN_SHUTOFF) {
+ dprintf(2, "Domain has been shut off\n");
+ response = 0;
+ break;
+ }
+
+ dprintf(4, "Domain still exists (state %d) after %d seconds\n",
+ di.state, tries);
+
+ if (++tries >= timeout)
+ break;
+ } while (1);
+
+ return response;
+}
+
+
+
+
int
do_fence_request_tcp(fence_req_t *req, fence_auth_type_t auth,
void *key, size_t key_len, virConnectPtr vp)
@@ -235,24 +283,18 @@
break;
case FENCE_OFF:
printf("Destroying domain %s...\n", (char *)req->domain);
+
+ dprintf(2, "[OFF] Calling virDomainDestroy\n");
ret = virDomainDestroy(vdp);
if (ret < 0) {
- /* raise_error(vp); */
+ printf("virDomainDestroy() failed: %d\n", ret);
break;
- } else {
- sleep(1);
}
- /* Check domain liveliness. If the domain is still here,
- we return failure, and the client must then retry */
- /* XXX On the xen 3.0.4 API, we will be able to guarantee
- synchronous virDomainDestroy, so this check will not
- be necessary */
- vdp = get_domain(req, vp);
- if (!vdp) {
- response = 0; /* Success! */
- } else {
- virDomainFree(vdp);
+ response = wait_domain(req, vp, 15);
+
+ if (response) {
+ printf("Domain still exists; fencing failed\n");
}
break;
case FENCE_REBOOT:
@@ -271,42 +313,26 @@
"libvirt\n");
}
- dprintf(2, "Calling virDomainDestroy\n");
+ dprintf(2, "[REBOOT] Calling virDomainDestroy\n");
ret = virDomainDestroy(vdp);
if (ret < 0) {
printf("virDomainDestroy() failed: %d\n", ret);
if (domain_desc)
free(domain_desc);
break;
- } else {
- /* Give it time for the operation to complete */
- sleep(3);
}
- /* Check domain liveliness. If the domain is still here,
- we return failure, and the client must then retry */
- /* XXX On the xen 3.0.4 API, we will be able to guarantee
- synchronous virDomainDestroy, so this check will not
- be necessary */
- vdp = get_domain(req, vp);
- if (!vdp) {
- dprintf(2, "Domain no longer exists\n");
- response = 0; /* Success! */
- } else {
- printf("Domain still exists; fencing failed\n");
- virDomainFree(vdp);
- ret = 1; /* Failed to kill it */
- }
+ response = wait_domain(req, vp, 15);
- /* Recreate the domain if possible */
- if (ret == 0 && domain_desc) {
+ if (response) {
+ printf("Domain still exists; fencing failed\n");
+ } else if (domain_desc) {
+ /* Recreate the domain if possible */
/* Success */
dprintf(2, "Calling virDomainCreateLinux()...\n");
virDomainCreateLinux(vp, domain_desc, 0);
- }
-
- if (domain_desc)
free(domain_desc);
+ }
break;
}
@@ -646,17 +672,14 @@
int mc_sock;
char key[4096];
int key_len = 0;
- char *my_options = "dfi:a:p:C:c:k:u?hVX";
+ char *my_options = "dfi:a:p:C:c:k:u?hV";
void *h;
args_init(&args);
args_get_getopt(argc, argv, my_options, &args);
- if (!(args.flags & F_NOCCS)) {
- args_get_ccs(my_options, &args);
- }
args_finalize(&args);
if (args.debug > 0) {
- dset(args.debug);
+ _debug = args.debug;
args_print(&args);
}
--- cluster/fence/agents/xvm/README 2006/10/05 16:11:36 1.1
+++ cluster/fence/agents/xvm/README 2006/12/01 22:14:40 1.2
@@ -1,4 +1,4 @@
-I. Fence_xvm - the Xen virtual machine fencing agent
+I. Fence_xvm - virtual machine fencing agent
Fence_xvm is an agent which establishes a communications link between
a cluster of virtual machines (VC) and a cluster of domain0/physical
@@ -20,11 +20,11 @@
cluster!).
-II. Fence_xvmd - The Xen virtual machine fencing host
+II. Fence_xvmd - The virtual machine fencing host
Fence_xvmd is a daemon which runs on physical hosts (e.g. in domain0)
-of the cluster hosting the Xen virtual cluster. It listens on a port
-for multicast traffic from Xen virtual cluster(s), and takes actions.
+of the cluster hosting the virtual cluster. It listens on a port
+for multicast traffic from virtual cluster(s), and takes actions.
Multiple disjoint virtual clusters can coexist on a single physical
host cluster, but this requires multiple instances of fence_xvmd.
@@ -41,11 +41,11 @@
last- known host is down, we must store the last-known locations of
each virtual machine in some sort of cluster-wide way. For this, we
use the AIS Checkpointing API, which is provided by OpenAIS. Every
-few seconds, fence_xvmd queries the Xen Hypervisor via libvirt and
+few seconds, fence_xvmd queries the hypervisor via libvirt and
stores any local VM states in a checkpoint. In the event of a
physical node failure (which consequently causes the failure of one
-or more Xen guests), we can then read the checkpoint section
-corresponding to the guest we need to fence to find out the previous
+or more guests), we can then read the checkpoint section corresponding
+to the guest we need to fence to find out the previous
owner. With that information, we can then check with CMAN to see if
the last-known host node has been fenced. If so, then the VM is
clean as well. The physical cluster must, therefore, have fencing
@@ -71,8 +71,10 @@
(e) Open connection to host contained within multicast
packet.
(f) Check with CMAN to see if last-known host has been fenced.
- (g) If last-known host has been fenced, send success response.
- (h) Authenticate server & send response.
+ (If it has not; do nothing -- this is why the physical
+ cluster also needs fencing!)
+ (g) Authenticate server & send response.
+ (h) If last-known host has been fenced, send success response.
NOTE: There is always a possibility that a VM is started again
before the fencing operation and checkpoint update for that VM
@@ -111,14 +113,70 @@
dd if=/dev/urandom of=/etc/cluster/fence_xvm.key bs=4096 count=1
-Distribute the generated key file to all domUs in a cluster as well
-as all dom0s which will be hosting that particular cluster of domUs.
-The key should not be placed on shared file systems (because shared
-file systems require the cluster, which requires fencing...).
-
-Start fence_xvmd on all dom0s
+Distribute the generated key file to all virtual machines in a
+cluster as well as all physical host nodes which will be hosting
+that particular cluster of guests. More simply, everything involved
+with hosting the virtual cluster as well as the virtual cluster
+itself must have the same key file; it acts as a password.
-Configure fence_xvm on the domU cluster...
-
-rest...tbd
+The key should not be placed on shared file systems (because shared
+file systems require the cluster, which requires fencing...).
+Furthermore, it is considered 'unsupported' to join a host cluster
+and a guest cluster in one management domain.
+
+A. Configuring the host (physical) cluster
+
+On the host cluster, you need to add the following tag as a
+child of the <cluster> tag in /etc/cluster/cluster.conf:
+
+ <fence_xvmd/>
+
+(Do not forget to increment the configuration version number and
+run 'ccs_tool update /etc/cluster/cluster.conf' !).
+
+Start fence_xvmd on all host nodes if it isn't already running.
+Just run 'fence_xvmd'. The next time the cluster is restarted,
+fence_xvmd will start automatically; it is started by the cman
+script if you have the above tag in cluster.conf.
+
+B. Configuring the guest (virtual) cluster
+
+On the guest cluster, you need to set up per-node fencing. This
+is a fairly simple task as well. First, you need to add a fence
+device for 'xvm'. Simply add the following to the <fencedevices/>
+tag in the guest cluster's cluster.conf:
+
+ <fencedevice name="xvm" agent="fence_xvm"/>
+
+After doing this, each node also needs individual fencing set up.
+For each <clusternode/> tag, you will need to add something like
+the following:
+
+ <fence>
+ <method name="1">
+ <device name="xvm" domain="doman-name"/>
+ </method>
+ </fence>
+
+For example, if you have a virtual host named 'vm1.test.com' with a
+corresponding virtual domain name of 'domU-vm1' in the dom0 cluster,
+and a node ID of 1, the <clusternode> tag for that virtual machine
+would look like so:
+
+ <clusternode name="vm1.test.com" nodeid="1" votes="1">
+ <fence>
+ <method name="1">
+ <device name="xvm" domain="domU-vm1"/>
+ </method>
+ </fence>
+ </clusternode>
+
+C. Advanced configuration
+
+Any advanced configuration parameters (e.g. changing authentication,
+hashing, key file, etc.) should be included in the <fence_xvmd/> tag
+in the host cluster and the <fencedevice .../> tag in the guest
+cluster. For a complete list of advanced parameters, see:
+ fence_xvmd -h
+ fence_xvm -h
More information about the Cluster-devel
mailing list