[libvirt] [PATCH 1/2] virCommandDoAsyncIO: Resolve race

Michal Privoznik mprivozn at redhat.com
Thu Feb 7 11:36:36 UTC 2013


With current implementation, virCommandWait unregister the stdio
callback and finish reading itself. However, the eventloop may
already have been in process of executing the callback in which
case one obtain these obscure error messages, like:

2013-02-06 11:41:43.766+0000: 19078: warning : virEventPollRemoveHandle:183 : Ignoring invalid remove watch -1
2013-02-06 11:41:43.766+0000: 19078: warning : virFileClose:65 : Tried to close invalid fd 25

The solution is to introduce a mutex and condition to virCommand,
and wait in virCommandWait for the eventloop to process all IOs.
This, however, requires all callees to unlock all mutexes held,
otherwise the eventloop may try to lock one of them and experience
deadlock. If that's the case, we will never be woken up to unlock
the problematic mutex.
---
 src/qemu/qemu_driver.c    | 58 +++++++++++++++++++++++++---
 src/qemu/qemu_migration.c | 15 +++++++-
 src/util/vircommand.c     | 98 ++++++++++++++++++++++++++++++++++++-----------
 src/util/virfile.c        |  4 ++
 4 files changed, 146 insertions(+), 29 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 979a027..e10fc89 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -2757,6 +2757,7 @@ qemuDomainSaveMemory(virQEMUDriverPtr driver,
     int fd = -1;
     int directFlag = 0;
     virFileWrapperFdPtr wrapperFd = NULL;
+    int wrapperFd_ret;
     unsigned int wrapperFlags = VIR_FILE_WRAPPER_NON_BLOCKING;
     unsigned long long pad;
     unsigned long long offset;
@@ -2833,7 +2834,15 @@ qemuDomainSaveMemory(virQEMUDriverPtr driver,
         goto cleanup;
     }
 
-    if (virFileWrapperFdClose(wrapperFd) < 0)
+    /* virCommandDoAsyncIO requires us to release all locks held */
+    virObjectRef(vm);
+    virObjectUnlock(vm);
+    qemuDriverUnlock(driver);
+    wrapperFd_ret = virFileWrapperFdClose(wrapperFd);
+    qemuDriverLock(driver);
+    virObjectLock(vm);
+    virObjectUnref(vm);
+    if (wrapperFd_ret < 0)
         goto cleanup;
 
     if ((fd = qemuOpenFile(driver, path, O_WRONLY, NULL, NULL)) < 0)
@@ -3240,6 +3249,7 @@ doCoreDump(virQEMUDriverPtr driver,
     int fd = -1;
     int ret = -1;
     virFileWrapperFdPtr wrapperFd = NULL;
+    int wrapperFd_ret;
     int directFlag = 0;
     unsigned int flags = VIR_FILE_WRAPPER_NON_BLOCKING;
 
@@ -3281,7 +3291,16 @@ doCoreDump(virQEMUDriverPtr driver,
                              path);
         goto cleanup;
     }
-    if (virFileWrapperFdClose(wrapperFd) < 0)
+
+    /* virCommandDoAsyncIO requires us to release all locks held */
+    virObjectRef(vm);
+    virObjectUnlock(vm);
+    qemuDriverUnlock(driver);
+    wrapperFd_ret = virFileWrapperFdClose(wrapperFd);
+    qemuDriverLock(driver);
+    virObjectLock(vm);
+    virObjectUnref(vm);
+    if (wrapperFd_ret < 0)
         goto cleanup;
 
     ret = 0;
@@ -4854,6 +4873,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
     virDomainEventPtr event;
     int intermediatefd = -1;
     virCommandPtr cmd = NULL;
+    int cmd_ret;
     char *errbuf = NULL;
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
 
@@ -4901,7 +4921,15 @@ qemuDomainSaveImageStartVM(virConnectPtr conn,
             VIR_FORCE_CLOSE(*fd);
         }
 
-        if (virCommandWait(cmd, NULL) < 0) {
+        /* virCommandDoAsyncIO requires us to release all locks held */
+        virObjectRef(vm);
+        virObjectUnlock(vm);
+        qemuDriverUnlock(driver);
+        cmd_ret = virCommandWait(cmd, NULL);
+        qemuDriverLock(driver);
+        virObjectLock(vm);
+        virObjectUnref(vm);
+        if (cmd_ret < 0) {
             qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, 0);
             ret = -1;
         }
@@ -4976,6 +5004,7 @@ qemuDomainRestoreFlags(virConnectPtr conn,
     int ret = -1;
     virQEMUSaveHeader header;
     virFileWrapperFdPtr wrapperFd = NULL;
+    int wrapperFd_ret;
     int state = -1;
 
     virCheckFlags(VIR_DOMAIN_SAVE_BYPASS_CACHE |
@@ -5009,7 +5038,16 @@ qemuDomainRestoreFlags(virConnectPtr conn,
 
     ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, &header, path,
                                      false);
-    if (virFileWrapperFdClose(wrapperFd) < 0)
+
+    /* virCommandDoAsyncIO requires us to release all locks held */
+    virObjectRef(vm);
+    virObjectUnlock(vm);
+    qemuDriverUnlock(driver);
+    wrapperFd_ret = virFileWrapperFdClose(wrapperFd);
+    qemuDriverLock(driver);
+    virObjectLock(vm);
+    virObjectUnref(vm);
+    if (wrapperFd_ret < 0)
         VIR_WARN("Failed to close %s", path);
 
     if (qemuDomainObjEndJob(driver, vm) == 0)
@@ -5153,6 +5191,7 @@ qemuDomainObjRestore(virConnectPtr conn,
     int ret = -1;
     virQEMUSaveHeader header;
     virFileWrapperFdPtr wrapperFd = NULL;
+    int wrapperFd_ret;
 
     fd = qemuDomainSaveImageOpen(driver, path, &def, &header,
                                  bypass_cache, &wrapperFd, NULL, -1, false,
@@ -5182,7 +5221,16 @@ qemuDomainObjRestore(virConnectPtr conn,
 
     ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, &header, path,
                                      start_paused);
-    if (virFileWrapperFdClose(wrapperFd) < 0)
+
+    /* virCommandDoAsyncIO requires us to release all locks held */
+    virObjectRef(vm);
+    virObjectUnlock(vm);
+    qemuDriverUnlock(driver);
+    wrapperFd_ret = virFileWrapperFdClose(wrapperFd);
+    qemuDriverLock(driver);
+    virObjectLock(vm);
+    virObjectUnref(vm);
+    if (wrapperFd_ret < 0)
         VIR_WARN("Failed to close %s", path);
 
 cleanup:
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index a75fb4e..ddbfd18 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -3610,6 +3610,7 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
     int rc;
     bool restoreLabel = false;
     virCommandPtr cmd = NULL;
+    int cmd_ret;
     int pipeFD[2] = { -1, -1 };
     unsigned long saveMigBandwidth = priv->migMaxBandwidth;
     char *errbuf = NULL;
@@ -3729,8 +3730,18 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm,
     if (rc < 0)
         goto cleanup;
 
-    if (cmd && virCommandWait(cmd, NULL) < 0)
-        goto cleanup;
+    if (cmd) {
+        /* virCommandDoAsyncIO requires us to release all locks held */
+        virObjectRef(vm);
+        virObjectUnlock(vm);
+        qemuDriverUnlock(driver);
+        cmd_ret = virCommandWait(cmd, NULL);
+        qemuDriverLock(driver);
+        virObjectLock(vm);
+        virObjectUnref(vm);
+        if (cmd_ret < 0)
+            goto cleanup;
+    }
 
     ret = 0;
 
diff --git a/src/util/vircommand.c b/src/util/vircommand.c
index db7dbca..97ff135 100644
--- a/src/util/vircommand.c
+++ b/src/util/vircommand.c
@@ -42,6 +42,7 @@
 #include "virpidfile.h"
 #include "virprocess.h"
 #include "virbuffer.h"
+#include "virthread.h"
 
 #define VIR_FROM_THIS VIR_FROM_NONE
 
@@ -90,6 +91,9 @@ struct _virCommand {
     int outWatch;
     int errWatch;
 
+    virMutex lock;
+    virCond cond;
+
     bool handshake;
     int handshakeWait[2];
     int handshakeNotify[2];
@@ -788,9 +792,24 @@ virCommandNewArgs(const char *const*args)
     cmd->inWatch = cmd->outWatch = cmd->errWatch = -1;
     cmd->pid = -1;
 
+    if (virMutexInit(&cmd->lock) < 0) {
+        virReportSystemError(errno, "%s", _("Unable to init mutex"));
+        goto error;
+    }
+
+    if (virCondInit(&cmd->cond) < 0) {
+        virReportSystemError(errno, "%s", _("Unable to init cond"));
+        virMutexDestroy(&cmd->lock);
+        goto error;
+    }
+
     virCommandAddArgSet(cmd, args);
 
     return cmd;
+
+error:
+    VIR_FREE(cmd);
+    return NULL;
 }
 
 /**
@@ -2146,6 +2165,8 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque)
     bool eof = false;
     int *fdptr = NULL, **fdptrptr = NULL;
 
+    virMutexLock(&cmd->lock);
+
     VIR_DEBUG("watch=%d fd=%d events=%d", watch, fd, events);
     errno = 0;
 
@@ -2190,14 +2211,21 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque)
             bufptr = &cmd->outbuf;
             fdptr = &cmd->outfd;
             fdptrptr = &cmd->outfdptr;
-        } else {
+        } else if (watch == cmd->errWatch) {
             watchPtr = &cmd->errWatch;
             bufptr = &cmd->errbuf;
             fdptr = &cmd->errfd;
             fdptrptr = &cmd->errfdptr;
+        } else {
+            /* Suspicious wakeup ... */
+            virCondBroadcast(&cmd->cond);
+            virMutexUnlock(&cmd->lock);
+            return;
         }
 
-        if (events & VIR_EVENT_HANDLE_READABLE) {
+        if (events & (VIR_EVENT_HANDLE_READABLE |
+                      VIR_EVENT_HANDLE_HANGUP |
+                      VIR_EVENT_HANDLE_ERROR)) {
             if (**bufptr)
                 len = strlen(**bufptr);
 
@@ -2241,7 +2269,9 @@ virCommandHandleReadWrite(int watch, int fd, int events, void *opaque)
             *bufptr = NULL;
         if (fdptrptr)
             *fdptrptr = NULL;
+        virCondBroadcast(&cmd->cond);
     }
+    virMutexUnlock(&cmd->lock);
 }
 
 
@@ -2377,24 +2407,24 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid)
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("command is already running as pid %lld"),
                        (long long) cmd->pid);
-        return -1;
+        goto cleanup;
     }
 
     if (!synchronous && (cmd->flags & VIR_EXEC_DAEMON)) {
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                        _("daemonized command cannot use virCommandRunAsync"));
-        return -1;
+        goto cleanup;
     }
     if (cmd->pwd && (cmd->flags & VIR_EXEC_DAEMON)) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
                        _("daemonized command cannot set working directory %s"),
                        cmd->pwd);
-        return -1;
+        goto cleanup;
     }
     if (cmd->pidfile && !(cmd->flags & VIR_EXEC_DAEMON)) {
         virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                        _("creation of pid file requires daemonized command"));
-        return -1;
+        goto cleanup;
     }
 
     str = virCommandToString(cmd);
@@ -2439,6 +2469,12 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid)
         ret = virCommandRegisterEventLoop(cmd);
     }
 
+cleanup:
+    if (ret < 0) {
+        VIR_FORCE_CLOSE(infd[0]);
+        VIR_FORCE_CLOSE(infd[1]);
+        cmd->infd = -1;
+    }
     return ret;
 }
 
@@ -2453,13 +2489,16 @@ virCommandRunAsync(virCommandPtr cmd, pid_t *pid)
  * completion. Returns 0 if the command
  * finished with the exit status set.  If @exitstatus is NULL, then the
  * child must exit with status 0 for this to succeed.
+ *
+ * In case virCommandDoAsyncIO() has been requested previously on
+ * @cmd, this command may block. So you are required to unlock all
+ * locks held prior calling this function.
  */
 int
 virCommandWait(virCommandPtr cmd, int *exitstatus)
 {
     int ret;
     int status = 0;
-    const int events = VIR_EVENT_HANDLE_READABLE | VIR_EVENT_HANDLE_HANGUP;
 
     if (!cmd ||cmd->has_error == ENOMEM) {
         virReportOOMError();
@@ -2485,22 +2524,25 @@ virCommandWait(virCommandPtr cmd, int *exitstatus)
      * and repeat the exitstatus check code ourselves.  */
     ret = virProcessWait(cmd->pid, exitstatus ? exitstatus : &status);
 
-    if (cmd->inWatch != -1) {
-        virEventRemoveHandle(cmd->inWatch);
-        cmd->inWatch = -1;
-    }
-
-    if (cmd->outWatch != -1) {
-        virEventRemoveHandle(cmd->outWatch);
-        virCommandHandleReadWrite(cmd->outWatch, cmd->outfd, events, cmd);
-        cmd->outWatch = -1;
-    }
-
-    if (cmd->errWatch != -1) {
-        virEventRemoveHandle(cmd->errWatch);
-        virCommandHandleReadWrite(cmd->errWatch, cmd->errfd, events, cmd);
-        cmd->errWatch = -1;
+    /* Hopefully, all locks has been unlocked. Otherwise, the
+     * eventloop might be trying to lock one of them again, which
+     * will deadlock (obviously), and we will never be woken up here */
+    virMutexLock(&cmd->lock);
+    while (!(cmd->inWatch == -1 &&
+             cmd->outWatch == -1 &&
+             cmd->errWatch == -1)) {
+        VIR_DEBUG("Waiting on in=%d out=%d err=%d",
+                  cmd->inWatch, cmd->outWatch, cmd->errWatch);
+
+        if (virCondWait(&cmd->cond, &cmd->lock) < 0) {
+            virReportSystemError(errno, "%s",
+                                 _("Unable to wait on condition"));
+            break;
+        }
+        VIR_DEBUG("Done waiting on in=%d out=%d err=%d",
+                  cmd->inWatch, cmd->outWatch, cmd->errWatch);
     }
+    virMutexUnlock(&cmd->lock);
 
     if (ret == 0) {
         cmd->pid = -1;
@@ -2719,6 +2761,9 @@ virCommandFree(virCommandPtr cmd)
         VIR_FORCE_CLOSE(cmd->transfer[i]);
     }
 
+    virMutexDestroy(&cmd->lock);
+    ignore_value(virCondDestroy(&cmd->cond));
+
     VIR_FREE(cmd->inbuf);
     VIR_FORCE_CLOSE(cmd->outfd);
     VIR_FORCE_CLOSE(cmd->errfd);
@@ -2772,6 +2817,7 @@ virCommandFree(virCommandPtr cmd)
  *
  *      ...
  *
+ *      // release all locks held (qemu, virDomainObj, ... )
  *      if (virCommandWait(cmd, NULL) < 0)
  *          goto cleanup;
  *
@@ -2789,6 +2835,14 @@ virCommandFree(virCommandPtr cmd)
  * of data to be written to @cmd's stdin, don't pass any binary
  * data. If you want to re-run command, you need to call this and
  * buffer setting functions (virCommandSet.*Buffer) prior each run.
+ *
+ * Moreover, you are required to unlock all mutexes held prior calling
+ * virCommandWait. Due to current implementation, the virCommandWait
+ * waits for the event loop to finish all IOs on @cmd. However, if we
+ * are holding a locked mutex already and the event loop decides to
+ * issue a different callback which tries to lock the mutex it will
+ * deadlock and don't call any other callbacks. Not even the one which
+ * will unlock us.
  */
 void
 virCommandDoAsyncIO(virCommandPtr cmd)
diff --git a/src/util/virfile.c b/src/util/virfile.c
index b4765fb..bb33801 100644
--- a/src/util/virfile.c
+++ b/src/util/virfile.c
@@ -278,6 +278,10 @@ virFileWrapperFdNew(int *fd ATTRIBUTE_UNUSED,
  * callers can conditionally create a virFileWrapperFd wrapper but
  * unconditionally call the cleanup code.  To avoid deadlock, only
  * call this after closing the fd resulting from virFileWrapperFdNew().
+ *
+ * Since the virFileWrapperFdNew has requested asynchronous IO on
+ * underlying virCommand and this uses virCommandWait so we are
+ * required to unlock all locks held.
  */
 int
 virFileWrapperFdClose(virFileWrapperFdPtr wfd)
-- 
1.8.0.2




More information about the libvir-list mailing list