[libvirt] PATCH: Prevent libvirtd 100% CPU usage when --timeout is used

Daniel P. Berrange berrange at redhat.com
Wed Feb 4 15:10:04 UTC 2009


On Wed, Feb 04, 2009 at 02:46:58PM +0000, Daniel P. Berrange wrote:
> A couple of bugs conspired to make libvirtd use 100% CPU usage when the
> --timeout option is used. This is the default for qemu:///session instance
> which are auto-spawned, so quite annoying!
> 
> eg   libvirt --timeout=30
> 
> 
>  - The qemudRunLoop() method was registering a timeout on every
>    iteration of the loop, when it considered itself inactive,
>    even if already registered
>  - virEventInteruptLocked() was looking at eventLoop.leader
>    and comparing to pthread_self() before anyone used the 
>    event loop. 'leader' was 0 at this point so it thought it
>    had to wake someone up even though no one was waiting in
>    the event loop.
> 
> The latter bug conspired with the former, so the act of registering
> the timeout, caused it to immediately see a wakeup signal on the
> following poll. So it did another loop iteration, adding another
> timer, getting woken up again, etc 100% cpu follows.
> 
> Another minor problem was that when completing an event loop
> iteration, we reset eventloop.leader to '0'. It is not safe to
> assume that pthread_t is an integer like this. The solution is
> to track when something is using the event loop explicitly, and
> not rely on the 'leader' variable, unless we know a thread is
> active.
> 
> Finally we never de-registered the shutdown timeout if a new
> client turned up while we were counting down.
> 
> This patch addresses all these flaws

Opps, forgot to remove one chunk of redundant code that'd totally 
unregister the timer, when we only want to deactivate it now.

Daniel

Index: qemud/event.c
===================================================================
RCS file: /data/cvs/libvirt/qemud/event.c,v
retrieving revision 1.17
diff -u -p -u -r1.17 event.c
--- qemud/event.c	22 Dec 2008 12:55:47 -0000	1.17
+++ qemud/event.c	4 Feb 2009 15:08:06 -0000
@@ -68,6 +68,7 @@ struct virEventTimeout {
 /* State for the main event loop */
 struct virEventLoop {
     pthread_mutex_t lock;
+    int running;
     pthread_t leader;
     int wakeupfd[2];
     int handlesCount;
@@ -521,6 +522,7 @@ int virEventRunOnce(void) {
     int ret, timeout, nfds;
 
     virEventLock();
+    eventLoop.running = 1;
     eventLoop.leader = pthread_self();
     if ((nfds = virEventMakePollFDs(&fds)) < 0) {
         virEventUnlock();
@@ -572,7 +574,7 @@ int virEventRunOnce(void) {
         return -1;
     }
 
-    eventLoop.leader = 0;
+    eventLoop.running = 0;
     virEventUnlock();
     return 0;
 }
@@ -611,7 +613,9 @@ int virEventInit(void)
 static int virEventInterruptLocked(void)
 {
     char c = '\0';
-    if (pthread_self() == eventLoop.leader)
+
+    if (!eventLoop.running ||
+        pthread_self() == eventLoop.leader)
         return 0;
 
     if (safewrite(eventLoop.wakeupfd[1], &c, sizeof(c)) != sizeof(c))
Index: qemud/qemud.c
===================================================================
RCS file: /data/cvs/libvirt/qemud/qemud.c,v
retrieving revision 1.138
diff -u -p -u -r1.138 qemud.c
--- qemud/qemud.c	28 Jan 2009 11:31:39 -0000	1.138
+++ qemud/qemud.c	4 Feb 2009 15:08:06 -0000
@@ -2013,11 +2013,15 @@ static int qemudOneLoop(void) {
     return 0;
 }
 
-static void qemudInactiveTimer(int timer ATTRIBUTE_UNUSED, void *data) {
+static void qemudInactiveTimer(int timerid, void *data) {
     struct qemud_server *server = (struct qemud_server *)data;
-    DEBUG0("Got inactive timer expiry");
-    if (!virStateActive()) {
-        DEBUG0("No state active, shutting down");
+
+    if (virStateActive() ||
+        server->clients) {
+        DEBUG0("Timer expired but still active, not shutting down");
+        virEventUpdateTimeoutImpl(timerid, -1);
+    } else {
+        DEBUG0("Timer expired and inactive, shutting down");
         server->shutdown = 1;
     }
 }
@@ -2048,9 +2052,18 @@ static void qemudFreeClient(struct qemud
 static int qemudRunLoop(struct qemud_server *server) {
     int timerid = -1;
     int ret = -1, i;
+    int timerActive = 0;
 
     virMutexLock(&server->lock);
 
+    if (timeout > 0 &&
+        (timerid = virEventAddTimeoutImpl(-1,
+                                          qemudInactiveTimer,
+                                          server, NULL)) < 0) {
+        VIR_ERROR0(_("Failed to register shutdown timeout"));
+        return -1;
+    }
+
     if (min_workers > max_workers)
         max_workers = min_workers;
 
@@ -2071,11 +2084,21 @@ static int qemudRunLoop(struct qemud_ser
          * if any drivers have active state, if not
          * shutdown after timeout seconds
          */
-        if (timeout > 0 && !virStateActive() && !server->clients) {
-            timerid = virEventAddTimeoutImpl(timeout*1000,
-                                             qemudInactiveTimer,
-                                             server, NULL);
-            DEBUG("Scheduling shutdown timer %d", timerid);
+        if (timeout > 0) {
+            if (timerActive) {
+                if (server->clients) {
+                    DEBUG("Deactivating shutdown timer %d", timerid);
+                    virEventUpdateTimeoutImpl(timerid, -1);
+                    timerActive = 0;
+                }
+            } else {
+                if (!virStateActive() &&
+                    !server->clients) {
+                    DEBUG("Activating shutdown timer %d", timerid);
+                    virEventUpdateTimeoutImpl(timerid, timeout * 1000);
+                    timerActive = 1;
+                }
+            }
         }
 
         virMutexUnlock(&server->lock);
@@ -2129,15 +2152,6 @@ static int qemudRunLoop(struct qemud_ser
             }
         }
 
-        /* Unregister any timeout that's active, since we
-         * just had an event processed
-         */
-        if (timerid != -1) {
-            DEBUG("Removing shutdown timer %d", timerid);
-            virEventRemoveTimeoutImpl(timerid);
-            timerid = -1;
-        }
-
         if (server->shutdown) {
             ret = 0;
             break;


-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|




More information about the libvir-list mailing list