[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[PATCH] Re: [libvirt] Silently ignored virDomainRestore failures



Charles Duffy wrote:
    For existing QEMU, it might be sufficient to just put an arbitrary
    'sleep(5)' before issuing 'cont', which would at least give it a
    reasonable chance of avoiding the race condition.

Well -- I wasn't going to submit the patch I'm now using internally (using and waiting for a sigil on stderr when migrate_url is "stdin"), but I suppose that with an else clause doing a sleep it might actually be the closest available option to a Right Thing for preexisting qemu.

I'll wait to hear back today from the contractors testing with it (they were hitting this race condition frequently) and post an amended copy here if it gets their thumbs-up.

Attached is what I'm presently using as a workaround until I have a proper ("info incoming") approach implemented. It is perhaps excessively paranoid (the extra .5sec of delay on the stdin case may not be needed), but we haven't been able to reproduce the issue with it applied.
>From ba5a3722fe699b7c4cf56b04dc90679089cd26a6 Mon Sep 17 00:00:00 2001
From: Charles Duffy <Charles_Duffy dell com>
Date: Wed, 30 Sep 2009 23:23:42 -0500
Subject: [PATCH] Wait for a sigil before considering a migration from stdin complete

---
 src/qemu_conf.c   |    2 +-
 src/qemu_driver.c |   68 +++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/src/qemu_conf.c b/src/qemu_conf.c
index 6b0b404..5a31c07 100644
--- a/src/qemu_conf.c
+++ b/src/qemu_conf.c
@@ -1401,7 +1401,7 @@ int qemudBuildCommandLine(virConnectPtr conn,
             }
         } else if (STREQ(migrateFrom, "stdio")) {
             if (qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_QEMU_EXEC) {
-                migrateFrom = "exec:cat";
+                migrateFrom = "exec:cat && { echo 'MIGRATION' 'DONE'; } >&2";
             } else if (!(qemuCmdFlags & QEMUD_CMD_FLAG_MIGRATE_KVM_STDIO)) {
                 qemudReportError(conn, NULL, NULL, VIR_ERR_NO_SUPPORT,
                                  "%s", _("STDIO migration is not supported with this QEMU binary"));
diff --git a/src/qemu_driver.c b/src/qemu_driver.c
index 412b68d..67b959f 100644
--- a/src/qemu_driver.c
+++ b/src/qemu_driver.c
@@ -101,7 +101,8 @@ static int qemudStartVMDaemon(virConnectPtr conn,
                               struct qemud_driver *driver,
                               virDomainObjPtr vm,
                               const char *migrateFrom,
-                              int stdin_fd);
+                              int stdin_fd,
+                              int *pos_p);
 
 static void qemudShutdownVMDaemon(virConnectPtr conn,
                                   struct qemud_driver *driver,
@@ -128,6 +129,11 @@ static int qemudDomainSetMemoryBalloon(virConnectPtr conn,
 static int qemudDetectVcpuPIDs(virConnectPtr conn,
                                virDomainObjPtr vm);
 
+static int qemudWaitForMigrateDone(virConnectPtr conn,
+                                   struct qemud_driver* driver,
+                                   virDomainObjPtr vm,
+                                   off_t pos);
+
 static struct qemud_driver *qemu_driver = NULL;
 
 static int qemuCgroupControllerActive(struct qemud_driver *driver,
@@ -234,7 +240,7 @@ qemudAutostartConfigs(struct qemud_driver *driver) {
         virDomainObjLock(vm);
         if (vm->autostart &&
             !virDomainIsActive(vm)) {
-            int ret = qemudStartVMDaemon(conn, driver, vm, NULL, -1);
+            int ret = qemudStartVMDaemon(conn, driver, vm, NULL, -1, NULL);
             if (ret < 0) {
                 virErrorPtr err = virGetLastError();
                 VIR_ERROR(_("Failed to autostart VM '%s': %s\n"),
@@ -1840,7 +1846,8 @@ static int qemudStartVMDaemon(virConnectPtr conn,
                               struct qemud_driver *driver,
                               virDomainObjPtr vm,
                               const char *migrateFrom,
-                              int stdin_fd) {
+                              int stdin_fd,
+                              int *pos_p) {
     const char **argv = NULL, **tmp;
     const char **progenv = NULL;
     int i, ret;
@@ -1980,6 +1987,8 @@ static int qemudStartVMDaemon(virConnectPtr conn,
     if ((pos = lseek(logfile, 0, SEEK_END)) < 0)
         VIR_WARN(_("Unable to seek to end of logfile: %s\n"),
                  virStrerror(errno, ebuf, sizeof ebuf));
+    if (pos_p)
+        *pos_p = pos;
 
     for (i = 0 ; i < ntapfds ; i++)
         FD_SET(tapfds[i], &keepfd);
@@ -2803,7 +2812,7 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml,
 
     def = NULL;
 
-    if (qemudStartVMDaemon(conn, driver, vm, NULL, -1) < 0) {
+    if (qemudStartVMDaemon(conn, driver, vm, NULL, -1, NULL) < 0) {
         virDomainRemoveInactive(&driver->domains,
                                 vm);
         vm = NULL;
@@ -3615,6 +3624,48 @@ cleanup:
     return ret;
 }
 
+/* signature matches qemudHandlerMonitorOutput */
+static int
+qemudCheckMigrateOK(virConnectPtr conn ATTRIBUTE_UNUSED,
+                    virDomainObjPtr vm ATTRIBUTE_UNUSED,
+                    const char *output,
+                    int fd ATTRIBUTE_UNUSED)
+{
+    if (strstr(output, "MIGRATION DONE") == NULL)
+        return 1; /* keep reading */
+    return 0;
+}
+
+static int
+qemudWaitForMigrateDone(virConnectPtr conn,
+                                   struct qemud_driver* driver,
+                                   virDomainObjPtr vm,
+                                   off_t pos) {
+    char buf[1024];
+    int logfd;
+    int ret;
+
+    if ((logfd = qemudLogReadFD(conn, driver->logDir, vm->def->name, pos)) < 0)
+        return -1;
+    ret = qemudReadLogOutput(conn, vm, logfd, buf, sizeof(buf),
+                             qemudCheckMigrateOK,
+                             "console", 15);
+
+    if (close(logfd) < 0) {
+        char ebuf[4096];
+        VIR_WARN(_("Unable to close logfile: %s\n"),
+                 virStrerror(errno, ebuf, sizeof ebuf));
+    }
+
+    if (ret < 0) {
+        /* Unexpected end of file - inform user of QEMU log data */
+        qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                         _("unable to start guest: %s"), buf);
+        return -1;
+    }
+
+    return 0;
+}
 
 static int qemudDomainSetVcpus(virDomainPtr dom, unsigned int nvcpus) {
     struct qemud_driver *driver = dom->conn->privateData;
@@ -3975,6 +4026,7 @@ static int qemudDomainRestore(virConnectPtr conn,
     virDomainObjPtr vm = NULL;
     int fd = -1;
     int ret = -1;
+    int pos = -1;
     char *xml = NULL;
     struct qemud_save_header header;
     virDomainEventPtr event = NULL;
@@ -4068,7 +4120,7 @@ static int qemudDomainRestore(virConnectPtr conn,
     def = NULL;
 
     /* Set the migration source and start it up. */
-    ret = qemudStartVMDaemon(conn, driver, vm, "stdio", fd);
+    ret = qemudStartVMDaemon(conn, driver, vm, "stdio", fd, &pos);
     close(fd);
     fd = -1;
     if (ret < 0) {
@@ -4080,6 +4132,8 @@ static int qemudDomainRestore(virConnectPtr conn,
         goto cleanup;
     }
 
+    qemudWaitForMigrateDone(conn, driver, vm, pos);
+
     event = virDomainEventNewFromObj(vm,
                                      VIR_DOMAIN_EVENT_STARTED,
                                      VIR_DOMAIN_EVENT_STARTED_RESTORED);
@@ -4363,7 +4417,7 @@ static int qemudDomainStart(virDomainPtr dom) {
         goto cleanup;
     }
 
-    ret = qemudStartVMDaemon(dom->conn, driver, vm, NULL, -1);
+    ret = qemudStartVMDaemon(dom->conn, driver, vm, NULL, -1, NULL);
     if (ret != -1)
         event = virDomainEventNewFromObj(vm,
                                          VIR_DOMAIN_EVENT_STARTED,
@@ -6400,7 +6454,7 @@ qemudDomainMigratePrepare2 (virConnectPtr dconn,
      * -incoming tcp:0.0.0.0:port
      */
     snprintf (migrateFrom, sizeof (migrateFrom), "tcp:0.0.0.0:%d", this_port);
-    if (qemudStartVMDaemon (dconn, driver, vm, migrateFrom, -1) < 0) {
+    if (qemudStartVMDaemon (dconn, driver, vm, migrateFrom, -1, NULL) < 0) {
         /* Note that we don't set an error here because qemudStartVMDaemon
          * should have already done that.
          */
-- 
1.6.4


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]