[Libvir] PATCH: Process QEMU monitor at startup

Daniel P. Berrange berrange at redhat.com
Fri Mar 2 14:35:31 UTC 2007


With the current QEMU driver we simply spawn the QEMU process and return to
the caller immediately. If the QEMU process dies immediately for some reason
the user will never no - their domain will simply never appear. To address
this, the attached patch will spawn the QEMU process and then immeditely
process stderr to look for the monitor PTY - or EOF indicating failure. If
it finds the monitor it will also try to connect to it - if this fails in 
any way then the domain is terminated with extreme prejudice - we absolutely
need the monitor connection for other operations to work. This should at
least cause a 'Domain startup failed' message to be returned to the user
although more work is needed to try and extract a useful error message
from the stuff written to stderr.

Also in this patch I have reversed the order in which file descriptors are
added to the poll array. Currently they are added server, then client, 
then VMs. The trouble with this is that when processing the server event, 
if a new client is added to the daemon then processing of subsequent FDs
is out of sync. Likewise if  a client message causes a new VM to startup
then processing of subsquent VM FDs is out of sync. Reversing the order
trivially solves it, because VM FDs s are now processed before client
FDs, and thus any new VMs started by a client don't mess things up. Likewise
client FDs are processed before server FDs. A more robust long term fix 
would be to carry around some extra metadata instead of relying on ordering
when dispatching events.

Finally this patch also fixes the 'resume' operation, and allows for the
VM state to be reported correctly - ie we can now report 'paused' as well
as shutdown/running.

Dan.
-- 
|=- Red Hat, Engineering, Emerging Technologies, Boston.  +1 978 392 2496 -=|
|=-           Perl modules: http://search.cpan.org/~danberr/              -=|
|=-               Projects: http://freshmeat.net/~danielpb/               -=|
|=-  GnuPG: 7D3B9505   F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505  -=| 
-------------- next part --------------
Index: conf.c
===================================================================
RCS file: /data/cvs/libvirt/qemud/conf.c,v
retrieving revision 1.38
diff -u -p -r1.38 conf.c
--- conf.c	26 Feb 2007 15:34:24 -0000	1.38
+++ conf.c	2 Mar 2007 14:15:30 -0000
@@ -1308,6 +1308,7 @@ qemudAssignVMDef(struct qemud_server *se
     vm->monitor = -1;
     vm->pid = -1;
     vm->id = -1;
+    vm->state = QEMUD_STATE_STOPPED;
     vm->def = def;
     vm->next = server->vms;
 
Index: driver.c
===================================================================
RCS file: /data/cvs/libvirt/qemud/driver.c,v
retrieving revision 1.17
diff -u -p -r1.17 driver.c
--- driver.c	26 Feb 2007 14:21:21 -0000	1.17
+++ driver.c	2 Mar 2007 14:15:31 -0000
@@ -60,6 +60,7 @@ int qemudMonitorCommand(struct qemud_ser
                         char **reply) {
     int size = 0;
     char *buf = NULL;
+
     if (write(vm->monitor, cmd, strlen(cmd)) < 0) {
         return -1;
     }
@@ -74,13 +75,20 @@ int qemudMonitorCommand(struct qemud_ser
         for (;;) {
             char data[1024];
             int got = read(vm->monitor, data, sizeof(data));
+
+            if (got == 0) {
+                if (buf)
+                    free(buf);
+                return -1;
+            }
             if (got < 0) {
                 if (errno == EINTR)
                     continue;
                 if (errno == EAGAIN)
                     break;
 
-                free(buf);
+                if (buf)
+                    free(buf);
                 return -1;
             }
             if (!(buf = realloc(buf, size+got+1)))
@@ -92,7 +100,7 @@ int qemudMonitorCommand(struct qemud_ser
         if (buf)
             qemudDebug("Mon [%s]", buf);
         /* Look for QEMU prompt to indicate completion */
-        if (buf && ((tmp = strstr(buf, "\n(qemu)")) != NULL)) {
+        if (buf && ((tmp = strstr(buf, "\n(qemu) ")) != NULL)) {
             tmp[0] = '\0';
             break;
         }
@@ -314,11 +322,14 @@ int qemudDomainSuspend(struct qemud_serv
         qemudReportError(server, VIR_ERR_OPERATION_FAILED, "domain is not running");
         return -1;
     }
+    if (vm->state == QEMUD_STATE_PAUSED)
+        return 0;
 
     if (qemudMonitorCommand(server, vm, "stop\n", &info) < 0) {
         qemudReportError(server, VIR_ERR_OPERATION_FAILED, "suspend operation failed");
         return -1;
     }
+    vm->state = QEMUD_STATE_PAUSED;
     qemudDebug("Reply %s", info);
     free(info);
     return 0;
@@ -336,13 +347,16 @@ int qemudDomainResume(struct qemud_serve
         qemudReportError(server, VIR_ERR_OPERATION_FAILED, "domain is not running");
         return -1;
     }
+    if (vm->state == QEMUD_STATE_RUNNING)
+        return 0;
     if (qemudMonitorCommand(server, vm, "cont\n", &info) < 0) {
         qemudReportError(server, VIR_ERR_OPERATION_FAILED, "resume operation failed");
         return -1;
     }
+    vm->state = QEMUD_STATE_RUNNING;
     qemudDebug("Reply %s", info);
     free(info);
-    return -1;
+    return 0;
 }
 
 
@@ -371,12 +385,7 @@ int qemudDomainGetInfo(struct qemud_serv
         return -1;
     }
 
-    if (!qemudIsActiveVM(vm)) {
-        *runstate = QEMUD_STATE_STOPPED;
-    } else {
-        /* XXX in future need to add PAUSED */
-        *runstate = QEMUD_STATE_RUNNING;
-    }
+    *runstate = vm->state;
 
     if (!qemudIsActiveVM(vm)) {
         *cputime = 0;
@@ -432,7 +441,7 @@ int qemudDomainDumpXML(struct qemud_serv
     strncpy(xml, vmxml, xmllen);
     xml[xmllen-1] = '\0';
 
-    free(xml);
+    free(vmxml);
 
     return 0;
 }
Index: internal.h
===================================================================
RCS file: /data/cvs/libvirt/qemud/internal.h,v
retrieving revision 1.17
diff -u -p -r1.17 internal.h
--- internal.h	23 Feb 2007 17:15:18 -0000	1.17
+++ internal.h	2 Mar 2007 14:15:31 -0000
@@ -216,6 +216,7 @@ struct qemud_vm {
     int monitor;
     int pid;
     int id;
+    int state;
 
     int *tapfds;
     int ntapfds;
Index: qemud.c
===================================================================
RCS file: /data/cvs/libvirt/qemud/qemud.c,v
retrieving revision 1.29
diff -u -p -r1.29 qemud.c
--- qemud.c	23 Feb 2007 17:15:18 -0000	1.29
+++ qemud.c	2 Mar 2007 14:15:31 -0000
@@ -615,6 +615,134 @@ qemudExec(struct qemud_server *server, c
 }
 
 
+static int qemudOpenMonitor(struct qemud_vm *vm, const char *monitor) {
+    int monfd;
+    char buffer[1024];
+    int got = 0;
+
+    if (!(monfd = open(monitor, O_RDWR))) {
+        return -1;
+    }
+    if (qemudSetCloseExec(monfd) < 0)
+        goto error;
+    if (qemudSetNonBlock(monfd) < 0)
+        goto error;
+
+    /* Consume & discard the initial greeting */
+    for(;;) {
+        int ret;
+
+        ret = read(monfd, buffer+got, sizeof(buffer)-got-1);
+        if (ret == 0)
+            goto error;
+        if (ret < 0) {
+            struct pollfd fd = { .fd = monfd, .events = POLLIN };
+            if (errno != EAGAIN &&
+                errno != EINTR)
+                goto error;
+
+            ret = poll(&fd, 1, 5);
+            if (ret == 0) {
+                goto error;
+            } else if (ret == -1) {
+                if (errno != EINTR)
+                    goto error;
+            } else if (fd.revents != POLLIN) {
+                goto error;
+            }
+        } else {
+            got += ret;
+            buffer[got] = '\0';
+            if (strstr(buffer, "(qemu) ") != NULL) {
+                break;
+            }
+        }
+    }
+    vm->monitor = monfd;
+    return 0;
+
+ error:
+    close(monfd);
+    return -1;
+}
+
+static int qemudWaitForMonitor(struct qemud_vm *vm) {
+    char buffer[1024]; /* Plenty of space to get startup greeting */
+    int got = 0;
+
+    for (;;) {
+        int ret;
+
+        ret = read(vm->stderr, buffer+got, sizeof(buffer)-got-1);
+        if (ret == 0) {
+            return -1;
+        }
+        if (ret < 0) {
+            struct pollfd fd = { .fd = vm->stderr, .events = POLLIN };
+            if (errno != EAGAIN &&
+                errno != EINTR) {
+                return -1;
+            }
+
+            ret = poll(&fd, 1, 5000);
+            if (ret == 0) {
+                return -1;
+            } else if (ret < 0) {
+                if (errno != EINTR)
+                    return -1;
+            } else if (fd.revents != POLLIN) {
+                return -1;
+            }
+        } else {
+            char monitor[100];
+            char *tmp = buffer;
+            got += ret;
+            buffer[got] = '\0';
+            while (tmp && *tmp) {
+                if (sscanf(tmp, "char device redirected to %19s", monitor) == 1) {
+                    if (qemudOpenMonitor(vm, monitor) < 0)
+                        return -1;
+                    return 0;
+                }
+                tmp = index(tmp, '\n');
+                if (tmp && *tmp)
+                    tmp++;
+            }
+        }
+    }
+    return -1;
+}
+
+static int qemudNextFreeVNCPort(struct qemud_server *server ATTRIBUTE_UNUSED) {
+    int i;
+
+    for (i = 5900 ; i < 6000 ; i++) {
+        int fd;
+        struct sockaddr_in addr;
+        addr.sin_family = AF_INET;
+        addr.sin_port = htons(i);
+        addr.sin_addr.s_addr = htonl(INADDR_ANY);
+        fd = socket(PF_INET, SOCK_STREAM, 0);
+        if (fd < 0)
+            return -1;
+
+        if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == 0) {
+            /* In use, try next */
+            close(fd);
+            continue;
+        }
+        close(fd);
+
+        if (errno == ECONNREFUSED) {
+            /* Not in use, lets grab it */
+            return i;
+        }
+
+        break;
+    }
+    return -1;
+}
+
 int qemudStartVMDaemon(struct qemud_server *server,
                        struct qemud_vm *vm) {
     char **argv = NULL;
@@ -626,9 +754,12 @@ int qemudStartVMDaemon(struct qemud_serv
         return -1;
     }
 
-    if (vm->def->vncPort < 0)
-        vm->def->vncActivePort = 5900 + server->nextvmid;
-    else
+    if (vm->def->vncPort < 0) {
+        int port = qemudNextFreeVNCPort(server);
+        if (port < 0)
+            return -1;
+        vm->def->vncActivePort = port;
+    } else
         vm->def->vncActivePort = vm->def->vncPort;
 
     if (qemudBuildCommandLine(server, vm, &argv) < 0)
@@ -636,12 +767,18 @@ int qemudStartVMDaemon(struct qemud_serv
 
     if (qemudExec(server, argv, &vm->pid, &vm->stdout, &vm->stderr) == 0) {
         vm->id = server->nextvmid++;
+        vm->state = QEMUD_STATE_RUNNING;
 
         server->ninactivevms--;
         server->nactivevms++;
         server->nvmfds += 2;
 
         ret = 0;
+
+        if (qemudWaitForMonitor(vm) < 0) {
+            qemudShutdownVMDaemon(server, vm);
+            ret = -1;
+        }
     }
 
     if (vm->tapfds) {
@@ -804,50 +941,6 @@ static int qemudVMData(struct qemud_serv
         }
         buf[ret] = '\0';
 
-        /*
-         * XXX this is bad - we should wait for tty and open the
-         * monitor when actually starting the guest, so we can
-         * reliably trap startup failures
-         */
-        if (vm->monitor == -1) {
-            char monitor[20];
-            /* Fairly lame assuming we receive the data all in one chunk.
-               This isn't guarenteed, but in practice it seems good enough.
-               This will probably bite me in the future.... */
-            if (sscanf(buf, "char device redirected to %19s", monitor) == 1) {
-                int monfd;
-
-                if (!(monfd = open(monitor, O_RDWR))) {
-                    perror("cannot open monitor");
-                    return -1;
-                }
-                if (qemudSetCloseExec(monfd) < 0) {
-                    close(monfd);
-                    return -1;
-                }
-                if (qemudSetNonBlock(monfd) < 0) {
-                    close(monfd);
-                    return -1;
-                }
-
-                /* Consume & discard the initial greeting */
-                /* XXX this is broken, we need to block until
-                   we see the initial prompt to ensure startup
-                   has completed */
-                for(;;) {
-                    char line[1024];
-                    if (read(monfd, line, sizeof(line)) < 0) {
-                        if (errno == EAGAIN) {
-                            break;
-                        }
-                        close(monfd);
-                        return -1;
-                    }
-                    qemudDebug("[%s]", line);
-                }
-                vm->monitor = monfd;
-            }
-        }
         qemudDebug("[%s]", buf);
     }
 }
@@ -896,6 +989,7 @@ int qemudShutdownVMDaemon(struct qemud_s
 
     vm->pid = -1;
     vm->id = -1;
+    vm->state = QEMUD_STATE_STOPPED;
 
     if (vm->newDef) {
         qemudFreeVMDef(vm->def);
@@ -1305,30 +1399,6 @@ static int qemudDispatchPoll(struct qemu
     if (server->shutdown)
         return 0;
 
-    while (sock) {
-        struct qemud_socket *next = sock->next;
-        /* FIXME: the daemon shouldn't exit on error here */
-        if (fds[fd].revents)
-            if (qemudDispatchServer(server, sock) < 0)
-                return -1;
-        fd++;
-        sock = next;
-    }
-
-    while (client) {
-        struct qemud_client *next = client->next;
-        if (fds[fd].revents) {
-            qemudDebug("Poll data normal");
-            if (fds[fd].revents == POLLOUT)
-                qemudDispatchClientWrite(server, client);
-            else if (fds[fd].revents == POLLIN)
-                qemudDispatchClientRead(server, client);
-            else
-                qemudDispatchClientFailure(server, client);
-        }
-        fd++;
-        client = next;
-    }
     vm = server->vms;
     while (vm) {
         struct qemud_vm *next = vm->next;
@@ -1356,6 +1426,7 @@ static int qemudDispatchPoll(struct qemu
         if (stderrfd != -1) {
             if (!failed) {
                 if (fds[fd].revents) {
+                    printf("%d\n", fds[fd].revents);
                     if (fds[fd].revents == POLLIN) {
                         if (qemudDispatchVMLog(server, vm, fds[fd].fd) < 0)
                             failed = 1;
@@ -1371,6 +1442,29 @@ static int qemudDispatchPoll(struct qemu
         if (failed)
             ret = -1; /* FIXME: the daemon shouldn't exit on failure here */
     }
+    while (client) {
+        struct qemud_client *next = client->next;
+        if (fds[fd].revents) {
+            qemudDebug("Poll data normal");
+            if (fds[fd].revents == POLLOUT)
+                qemudDispatchClientWrite(server, client);
+            else if (fds[fd].revents == POLLIN)
+                qemudDispatchClientRead(server, client);
+            else
+                qemudDispatchClientFailure(server, client);
+        }
+        fd++;
+        client = next;
+    }
+    while (sock) {
+        struct qemud_socket *next = sock->next;
+        /* FIXME: the daemon shouldn't exit on error here */
+        if (fds[fd].revents)
+            if (qemudDispatchServer(server, sock) < 0)
+                return -1;
+        fd++;
+        sock = next;
+    }
 
     /* Cleanup any VMs which shutdown & dont have an associated
        config file */
@@ -1408,22 +1502,6 @@ static void qemudPreparePoll(struct qemu
     fds[fd].events = POLLIN;
     fd++;
 
-    for (sock = server->sockets ; sock ; sock = sock->next) {
-        fds[fd].fd = sock->fd;
-        fds[fd].events = POLLIN;
-        fd++;
-    }
-
-    for (client = server->clients ; client ; client = client->next) {
-        fds[fd].fd = client->fd;
-        /* Refuse to read more from client if tx is pending to
-           rate limit */
-        if (client->tx)
-            fds[fd].events = POLLOUT | POLLERR | POLLHUP;
-        else
-            fds[fd].events = POLLIN | POLLERR | POLLHUP;
-        fd++;
-    }
     for (vm = server->vms ; vm ; vm = vm->next) {
         if (!qemudIsActiveVM(vm))
             continue;
@@ -1438,6 +1516,21 @@ static void qemudPreparePoll(struct qemu
             fd++;
         }
     }
+    for (client = server->clients ; client ; client = client->next) {
+        fds[fd].fd = client->fd;
+        /* Refuse to read more from client if tx is pending to
+           rate limit */
+        if (client->tx)
+            fds[fd].events = POLLOUT | POLLERR | POLLHUP;
+        else
+            fds[fd].events = POLLIN | POLLERR | POLLHUP;
+        fd++;
+    }
+    for (sock = server->sockets ; sock ; sock = sock->next) {
+        fds[fd].fd = sock->fd;
+        fds[fd].events = POLLIN;
+        fd++;
+    }
 }
 
 


More information about the libvir-list mailing list