[libvirt] [PATCH] Fix now dead cleanup of VMs on libvirtd restart

Daniel P. Berrange berrange at redhat.com
Fri Jul 15 14:37:17 UTC 2011


From: "Daniel P. Berrange" <berrange at redhat.com>

When libvirtd restarts it will attempt to reconnect to existing
LXC containers. If it loads a XML state file for the container
the container will appear running. If we fail to read the PID
file, or fail to connect to the LXC monitor, we should be killing
off the guest, but if the VMs cgroup does not exist any more,
cleanup will get skipped. Reading the PID file is also pointless
since the PID is in the XML statefile

In lxcReconnectVM we do not need to read the PID file. If part
of the reconnect process fails we need to run the VM terminate
code as a safety net.

In lxcVMTerminate, if we can't obtain the VM cgroup, we know
the process has died, but we must still run lxcVMCleanup to
clear out the virDomainObjPtr live state

* src/lxc/lxc_driver.c: Fix cleanup of dead VMs on restart
---
 src/lxc/lxc_driver.c |   60 +++++++++++++++++++++++++------------------------
 1 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index c8d78d5..03360e5 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1340,22 +1340,26 @@ static int lxcVmTerminate(lxc_driver_t *driver,
         return -1;
     }
 
-    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) != 0)
-        return -1;
-
-    rc = virCgroupKillPainfully(group);
-    if (rc < 0) {
-        virReportSystemError(-rc, "%s",
-                             _("Failed to kill container PIDs"));
-        rc = -1;
-        goto cleanup;
-    }
-    if (rc == 1) {
-        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
-                 _("Some container PIDs refused to die"));
-        rc = -1;
-        goto cleanup;
+    if (virCgroupForDomain(driver->cgroup, vm->def->name, &group, 0) == 0) {
+        rc = virCgroupKillPainfully(group);
+        if (rc < 0) {
+            virReportSystemError(-rc, "%s",
+                                 _("Failed to kill container PIDs"));
+            rc = -1;
+            goto cleanup;
+        }
+        if (rc == 1) {
+            lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
+                     _("Some container PIDs refused to die"));
+            rc = -1;
+            goto cleanup;
+        }
+    } else {
+        /* If cgroup doesn't exist, the VM pids must have already
+         * died and so we're just cleaning up stale state
+         */
     }
+
     lxcVmCleanup(driver, vm, reason);
 
     rc = 0;
@@ -2172,32 +2176,24 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
     lxcDomainObjPrivatePtr priv;
 
     virDomainObjLock(vm);
+    VIR_DEBUG("Reconnect %d %d %d\n", vm->def->id, vm->pid, vm->state.state);
 
     priv = vm->privateData;
-    if ((priv->monitor = lxcMonitorClient(driver, vm)) < 0) {
-        goto cleanup;
-    }
-
-    /* Read pid from controller */
-    if ((virFileReadPid(lxc_driver->stateDir, vm->def->name, &vm->pid)) != 0) {
-        VIR_FORCE_CLOSE(priv->monitor);
-        goto cleanup;
-    }
 
     if (vm->pid != 0) {
         vm->def->id = vm->pid;
         virDomainObjSetState(vm, VIR_DOMAIN_RUNNING,
                              VIR_DOMAIN_RUNNING_UNKNOWN);
 
+        if ((priv->monitor = lxcMonitorClient(driver, vm)) < 0)
+            goto error;
+
         if ((priv->monitorWatch = virEventAddHandle(
                  priv->monitor,
                  VIR_EVENT_HANDLE_ERROR | VIR_EVENT_HANDLE_HANGUP,
                  lxcMonitorEvent,
-                 vm, NULL)) < 0) {
-            lxcVmTerminate(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
-            virDomainAuditStop(vm, "failed");
-            goto cleanup;
-        }
+                 vm, NULL)) < 0)
+            goto error;
     } else {
         vm->def->id = -1;
         VIR_FORCE_CLOSE(priv->monitor);
@@ -2205,6 +2201,12 @@ lxcReconnectVM(void *payload, const void *name ATTRIBUTE_UNUSED, void *opaque)
 
 cleanup:
     virDomainObjUnlock(vm);
+    return;
+
+error:
+    lxcVmTerminate(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED);
+    virDomainAuditStop(vm, "failed");
+    goto cleanup;
 }
 
 
-- 
1.7.6




More information about the libvir-list mailing list