[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[libvirt] [PATCHv2] unlock eventLoop before calling callback function



From: Wen Congyang <wency cn fujitsu com>

When I use newest libvirt to save a domain, libvirtd will be deadlock.
Here is the output of gdb:
(gdb) thread 3
[Switching to thread 3 (Thread 0x7f972a1fc710 (LWP 30265))]#0  0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
    at qemu/qemu_driver.c:2074
    ret=0x7f972a1fbbe0) at remote.c:2273
(gdb) thread 7
[Switching to thread 7 (Thread 0x7f9730bcd710 (LWP 30261))]#0  0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
(gdb) bt
(gdb) p *(virMutexPtr)0x6fdd60
$2 = {lock = {__data = {__lock = 2, __count = 0, __owner = 30261, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
    __size = "\002\000\000\000\000\000\000\000\065v\000\000\001", '\000' <repeats 26 times>, __align = 2}}
(gdb) p *(virMutexPtr)0x1a63ac0
$3 = {lock = {__data = {__lock = 2, __count = 0, __owner = 30265, __nusers = 1, __kind = 0, __spins = 0, __list = {__prev = 0x0, __next = 0x0}},
    __size = "\002\000\000\000\000\000\000\000\071v\000\000\001", '\000' <repeats 26 times>, __align = 2}}
(gdb) info threads
  7 Thread 0x7f9730bcd710 (LWP 30261)  0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
  6 Thread 0x7f972bfff710 (LWP 30262)  0x000000351fe0b43c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
  5 Thread 0x7f972b5fe710 (LWP 30263)  0x000000351fe0b43c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
  4 Thread 0x7f972abfd710 (LWP 30264)  0x000000351fe0b43c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
* 3 Thread 0x7f972a1fc710 (LWP 30265)  0x000000351fe0e034 in __lll_lock_wait () from /lib64/libpthread.so.0
  2 Thread 0x7f97297fb710 (LWP 30266)  0x000000351fe0b43c in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
  1 Thread 0x7f9737aac800 (LWP 30260)  0x000000351fe0803d in pthread_join () from /lib64/libpthread.so.0

The reason is that we will try to lock some object in callback function, and we may call event API with locking the same object.
In the function virEventDispatchHandles(), we unlock eventLoop before calling callback function. I think we should
do the same thing in the function virEventCleanupTimeouts() and virEventCleanupHandles().

Signed-off-by: Wen Congyang <wency cn fujitsu com>
Signed-off-by: Eric Blake <eblake redhat com>
---

v2: incorporate comments from reviewers, and rebase on top of file move

I tested that this avoided deadlock for my 'virsh save' case
where I was reporting failure last week.

 src/util/event_poll.c |   27 +++++++++++++++++++--------
 1 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/util/event_poll.c b/src/util/event_poll.c
index dd83fc3..91000e2 100644
--- a/src/util/event_poll.c
+++ b/src/util/event_poll.c
@@ -354,7 +354,7 @@ static struct pollfd *virEventPollMakePollFDs(int *nfds) {

     *nfds = 0;
     for (i = 0 ; i < eventLoop.handlesCount ; i++) {
-        if (eventLoop.handles[i].events)
+        if (eventLoop.handles[i].events && !eventLoop.handles[i].deleted)
             (*nfds)++;
     }

@@ -366,11 +366,12 @@ static struct pollfd *virEventPollMakePollFDs(int *nfds) {

     *nfds = 0;
     for (i = 0 ; i < eventLoop.handlesCount ; i++) {
-        EVENT_DEBUG("Prepare n=%d w=%d, f=%d e=%d", i,
+        EVENT_DEBUG("Prepare n=%d w=%d, f=%d e=%d d=%d", i,
                     eventLoop.handles[i].watch,
                     eventLoop.handles[i].fd,
-                    eventLoop.handles[i].events);
-        if (!eventLoop.handles[i].events)
+                    eventLoop.handles[i].events,
+                    eventLoop.handles[i].deleted);
+        if (!eventLoop.handles[i].events || eventLoop.handles[i].deleted)
             continue;
         fds[*nfds].fd = eventLoop.handles[i].fd;
         fds[*nfds].events = eventLoop.handles[i].events;
@@ -506,8 +507,13 @@ static void virEventPollCleanupTimeouts(void) {

         EVENT_DEBUG("Purging timeout %d with id %d", i,
                     eventLoop.timeouts[i].timer);
-        if (eventLoop.timeouts[i].ff)
-            (eventLoop.timeouts[i].ff)(eventLoop.timeouts[i].opaque);
+        if (eventLoop.timeouts[i].ff) {
+            virFreeCallback ff = eventLoop.timeouts[i].ff;
+            void *opaque = eventLoop.timeouts[i].opaque;
+            virMutexUnlock(&eventLoop.lock);
+            ff(opaque);
+            virMutexLock(&eventLoop.lock);
+        }

         if ((i+1) < eventLoop.timeoutsCount) {
             memmove(eventLoop.timeouts+i,
@@ -546,8 +552,13 @@ static void virEventPollCleanupHandles(void) {
             continue;
         }

-        if (eventLoop.handles[i].ff)
-            (eventLoop.handles[i].ff)(eventLoop.handles[i].opaque);
+        if (eventLoop.handles[i].ff) {
+            virFreeCallback ff = eventLoop.handles[i].ff;
+            void *opaque = eventLoop.handles[i].opaque;
+            virMutexUnlock(&eventLoop.lock);
+            ff(opaque);
+            virMutexLock(&eventLoop.lock);
+        }

         if ((i+1) < eventLoop.handlesCount) {
             memmove(eventLoop.handles+i,
-- 
1.7.4


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]