[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [libvirt] [RFC] Substitute poll by epoll in virEventPollRunOnce



FlameGraphs show that on small machine that can run 35 VMs version with epoll has 20% less perf counters.

2/10/2017 4:29 PM, Derbyshev Dmitriy пишет:
From: Derbyshev Dmitry <dderbyshev virtuozzo com>

This makes it possible to avoid allocations in
virEventPollMakePollFDs, as well as generally reduces time spent in
kernel if handles remain the same, which should generally be true.

__________________________________________

Questions:
1) What to do with probe in virEventPollRunOnce?
2) Are ifdef an acceptable solution to epoll avaliability issues?
3) Is using MAX_POLL_EVENTS_AT_ONCE constant a valid solution?
4) some fds are sometimes added twice, but only once with events!=0? This complicates virEventPoll*Handle fuctions. At the very least, it is called twice in _dbus_watch_list_set_functions.

Signed-off-by: Derbyshev Dmitry <dderbyshev virtuozzo com>
---
  configure.ac                     |  26 +++++
  src/util/vireventpoll.c          | 205 ++++++++++++++++++++++++++++++++++++---
  src/util/vireventpoll.h          |   5 +
  tests/commanddata/test3epoll.log |  15 +++
  tests/commandtest.c              |   4 +
  5 files changed, 242 insertions(+), 13 deletions(-)
  create mode 100644 tests/commanddata/test3epoll.log

diff --git a/configure.ac b/configure.ac
index dfc536f..c985ccc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2695,6 +2695,32 @@ AC_DEFINE_UNQUOTED([isbase64],[libvirt_gl_isbase64],[Hack to avoid symbol clash]
  AC_DEFINE_UNQUOTED([base64_encode],[libvirt_gl_base64_encode],[Hack to avoid symbol clash])
  AC_DEFINE_UNQUOTED([base64_encode_alloc],[libvirt_gl_base64_encode_alloc],[Hack to avoid symbol clash])
+AC_ARG_ENABLE([epoll],
+              [AS_HELP_STRING([--enable-epoll],[use epoll(4) on Linux])],
+              [enable_epoll=$enableval], [enable_epoll=auto])
+if test x$enable_epoll = xno; then
+    have_linux_epoll=no
+else
+    AC_MSG_CHECKING([for Linux epoll(4)])
+    AC_LINK_IFELSE([AC_LANG_PROGRAM(
+        [
+        #ifndef __linux__
+        #error This is not Linux
+        #endif
+        #include <sys/epoll.h>
+        ],
+        [epoll_create1 (EPOLL_CLOEXEC);])],
+        [have_linux_epoll=yes],
+        [have_linux_epoll=no])
+    AC_MSG_RESULT([$have_linux_epoll])
+fi
+if test x$enable_epoll,$have_linux_epoll = xyes,no; then
+    AC_MSG_ERROR([epoll support explicitly enabled but not available])
+fi
+if test "x$have_linux_epoll" = xyes; then
+  AC_DEFINE_UNQUOTED([HAVE_LINUX_EPOLL], 1, [whether epoll is supported])
+fi
+
  AC_CONFIG_FILES([run],
                  [chmod +x,-w run])
  AC_CONFIG_FILES([\
diff --git a/src/util/vireventpoll.c b/src/util/vireventpoll.c
index 81ecab4..1541706 100644
--- a/src/util/vireventpoll.c
+++ b/src/util/vireventpoll.c
@@ -25,7 +25,11 @@
#include <stdlib.h>
  #include <string.h>
-#include <poll.h>
+#ifdef HAVE_LINUX_EPOLL
+# include <sys/epoll.h>
+#else
+# include <poll.h>
+#endif
  #include <sys/time.h>
  #include <errno.h>
  #include <unistd.h>
@@ -87,6 +91,9 @@ struct virEventPollLoop {
      size_t timeoutsCount;
      size_t timeoutsAlloc;
      struct virEventPollTimeout *timeouts;
+#ifdef HAVE_LINUX_EPOLL
+    int epollfd;
+#endif
  };
/* Only have one event loop */
@@ -109,6 +116,11 @@ int virEventPollAddHandle(int fd, int events,
                            virFreeCallback ff)
  {
      int watch;
+#ifdef HAVE_LINUX_EPOLL
+    size_t i;
+    struct epoll_event ev;
+#endif
+    int native = virEventPollToNativeEvents(events);
      virMutexLock(&eventLoop.lock);
      if (eventLoop.handlesCount == eventLoop.handlesAlloc) {
          EVENT_DEBUG("Used %zu handle slots, adding at least %d more",
@@ -124,8 +136,7 @@ int virEventPollAddHandle(int fd, int events,
eventLoop.handles[eventLoop.handlesCount].watch = watch;
      eventLoop.handles[eventLoop.handlesCount].fd = fd;
-    eventLoop.handles[eventLoop.handlesCount].events =
-                                         virEventPollToNativeEvents(events);
+    eventLoop.handles[eventLoop.handlesCount].events = native;
      eventLoop.handles[eventLoop.handlesCount].cb = cb;
      eventLoop.handles[eventLoop.handlesCount].ff = ff;
      eventLoop.handles[eventLoop.handlesCount].opaque = opaque;
@@ -133,7 +144,30 @@ int virEventPollAddHandle(int fd, int events,
eventLoop.handlesCount++; +#ifdef HAVE_LINUX_EPOLL
+    ev.events = native;
+    ev.data.fd = fd;
+    if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+        if (errno == EEXIST) {
+            for (i = 0; i < eventLoop.handlesCount; i++) {
+                if (eventLoop.handles[i].fd == fd &&
+                    !eventLoop.handles[i].deleted) {
+                    ev.events |= eventLoop.handles[i].events;
+                }
+            }
+            if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD, fd, &ev) < 0) {
+                virMutexUnlock(&eventLoop.lock);
+                return -1;
+            }
+        }
+        else {
+            virMutexUnlock(&eventLoop.lock);
+            return -1;
+        }
+    }
+#else
      virEventPollInterruptLocked();
+#endif
PROBE(EVENT_POLL_ADD_HANDLE,
            "watch=%d fd=%d events=%d cb=%p opaque=%p ff=%p",
@@ -145,8 +179,14 @@ int virEventPollAddHandle(int fd, int events,
void virEventPollUpdateHandle(int watch, int events)
  {
+#ifdef HAVE_LINUX_EPOLL
+    struct epoll_event ev;
+    size_t i, j;
+#else
      size_t i;
+#endif
      bool found = false;
+    int native = virEventPollToNativeEvents(events);
      PROBE(EVENT_POLL_UPDATE_HANDLE,
            "watch=%d events=%d",
            watch, events);
@@ -159,13 +199,34 @@ void virEventPollUpdateHandle(int watch, int events)
      virMutexLock(&eventLoop.lock);
      for (i = 0; i < eventLoop.handlesCount; i++) {
          if (eventLoop.handles[i].watch == watch) {
-            eventLoop.handles[i].events =
-                    virEventPollToNativeEvents(events);
+            eventLoop.handles[i].events = native;
+#ifndef HAVE_LINUX_EPOLL
              virEventPollInterruptLocked();
+#endif
              found = true;
              break;
          }
      }
+
+#ifdef HAVE_LINUX_EPOLL
+    ev.events = native;
+    for (j = 0; j < eventLoop.handlesCount; j++) {
+        if (eventLoop.handles[i].fd == eventLoop.handles[i].fd &&
+            !eventLoop.handles[i].deleted &&
+            i != j) {
+            ev.events |= eventLoop.handles[i].events;
+        }
+    }
+    ev.data.fd = eventLoop.handles[i].fd;
+
+    if (found && epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD,
+                           eventLoop.handles[i].fd, &ev) < 0) {
+        VIR_WARN("Update for existing handle watch %d failed", watch);
+        virMutexUnlock(&eventLoop.lock);
+        return;
+    }
+#endif
+
      virMutexUnlock(&eventLoop.lock);
if (!found)
@@ -180,7 +241,12 @@ void virEventPollUpdateHandle(int watch, int events)
   */
  int virEventPollRemoveHandle(int watch)
  {
+#ifdef HAVE_LINUX_EPOLL
+    struct epoll_event ev;
+    size_t i, j;
+#else
      size_t i;
+#endif
      PROBE(EVENT_POLL_REMOVE_HANDLE,
            "watch=%d",
            watch);
@@ -196,15 +262,47 @@ int virEventPollRemoveHandle(int watch)
              continue;
if (eventLoop.handles[i].watch == watch) {
-            EVENT_DEBUG("mark delete %zu %d", i, eventLoop.handles[i].fd);
-            eventLoop.handles[i].deleted = 1;
-            virEventPollInterruptLocked();
+            break;
+        }
+    }
+
+    if (i == eventLoop.handlesCount) {
+        virMutexUnlock(&eventLoop.lock);
+        return -1;
+    }
+
+#ifdef HAVE_LINUX_EPOLL
+    ev.events = 0;
+    for (j = 0; j < eventLoop.handlesCount; j++) {
+        if (eventLoop.handles[j].fd == eventLoop.handles[i].fd &&
+            !eventLoop.handles[j].deleted &&
+            i != j) {
+            ev.events |= eventLoop.handles[i].events;
+        }
+    }
+
+    ev.data.fd = eventLoop.handles[i].fd;
+    if (ev.events) {
+        if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_MOD, eventLoop.handles[i].fd,
+                      &ev) < 0) {
              virMutexUnlock(&eventLoop.lock);
-            return 0;
+            return -1;
          }
      }
+    else {
+        if (epoll_ctl(eventLoop.epollfd, EPOLL_CTL_DEL, eventLoop.handles[i].fd,
+                      &ev) < 0) {
+            virMutexUnlock(&eventLoop.lock);
+            return -1;
+        }
+    }
+#endif
+
+    EVENT_DEBUG("mark delete %zu %d", i, eventLoop.handles[i].fd);
+    eventLoop.handles[i].deleted = 1;
+    virEventPollInterruptLocked();
      virMutexUnlock(&eventLoop.lock);
-    return -1;
+    return 0;
  }
@@ -373,6 +471,7 @@ static int virEventPollCalculateTimeout(int *timeout)
      return 0;
  }
+#ifndef HAVE_LINUX_EPOLL
  /*
   * Allocate a pollfd array containing data for all registered
   * file handles. The caller must free the returned data struct
@@ -409,6 +508,7 @@ static struct pollfd *virEventPollMakePollFDs(int *nfds) {
return fds;
  }
+#endif
/*
@@ -472,7 +572,11 @@ static int virEventPollDispatchTimeouts(void)
   *
   * Returns 0 upon success, -1 if an error occurred
   */
+#ifdef HAVE_LINUX_EPOLL
+static int virEventPollDispatchHandles(int nfds, struct epoll_event *events)
+#else
  static int virEventPollDispatchHandles(int nfds, struct pollfd *fds)
+#endif
  {
      size_t i, n;
      VIR_DEBUG("Dispatch %d", nfds);
@@ -482,7 +586,11 @@ static int virEventPollDispatchHandles(int nfds, struct pollfd *fds)
       * in the fds array we've got */
      for (i = 0, n = 0; n < nfds && i < eventLoop.handlesCount; n++) {
          while (i < eventLoop.handlesCount &&
+#ifdef HAVE_LINUX_EPOLL
+               (eventLoop.handles[i].fd != events[n].data.fd ||
+#else
                 (eventLoop.handles[i].fd != fds[n].fd ||
+#endif
                  eventLoop.handles[i].events == 0)) {
              i++;
          }
@@ -496,16 +604,25 @@ static int virEventPollDispatchHandles(int nfds, struct pollfd *fds)
              continue;
          }
+#ifdef HAVE_LINUX_EPOLL
+        if (events[n].events) {
+            int hEvents = virEventPollFromNativeEvents(events[n].events);
+#else
          if (fds[n].revents) {
+            int hEvents = virEventPollFromNativeEvents(fds[n].revents);
+#endif
              virEventHandleCallback cb = eventLoop.handles[i].cb;
              int watch = eventLoop.handles[i].watch;
              void *opaque = eventLoop.handles[i].opaque;
-            int hEvents = virEventPollFromNativeEvents(fds[n].revents);
              PROBE(EVENT_POLL_DISPATCH_HANDLE,
                    "watch=%d events=%d",
                    watch, hEvents);
              virMutexUnlock(&eventLoop.lock);
+#ifdef HAVE_LINUX_EPOLL
+            (cb)(watch, events[n].data.fd, hEvents, opaque);
+#else
              (cb)(watch, fds[n].fd, hEvents, opaque);
+#endif
              virMutexLock(&eventLoop.lock);
          }
      }
@@ -618,7 +735,11 @@ static void virEventPollCleanupHandles(void)
   */
  int virEventPollRunOnce(void)
  {
+#ifdef HAVE_LINUX_EPOLL
+    struct epoll_event events[MAX_POLL_EVENTS_AT_ONCE];
+#else
      struct pollfd *fds = NULL;
+#endif
      int ret, timeout, nfds;
virMutexLock(&eventLoop.lock);
@@ -628,17 +749,29 @@ int virEventPollRunOnce(void)
      virEventPollCleanupTimeouts();
      virEventPollCleanupHandles();
- if (!(fds = virEventPollMakePollFDs(&nfds)) ||
-        virEventPollCalculateTimeout(&timeout) < 0)
+#ifndef HAVE_LINUX_EPOLL
+    if (!(fds = virEventPollMakePollFDs(&nfds)))
+        goto error;
+#endif
+    if (virEventPollCalculateTimeout(&timeout) < 0)
          goto error;
virMutexUnlock(&eventLoop.lock); retry:
+#ifdef HAVE_LINUX_EPOLL
+//FIXME: PROBE handles
+    PROBE(EVENT_POLL_RUN,
+          "nhandles=%d timeout=%d",
+          42, timeout);
+    nfds = ret = epoll_wait(eventLoop.epollfd, events,
+                     MAX_POLL_EVENTS_AT_ONCE, timeout);
+#else
      PROBE(EVENT_POLL_RUN,
            "nhandles=%d timeout=%d",
            nfds, timeout);
      ret = poll(fds, nfds, timeout);
+#endif
      if (ret < 0) {
          EVENT_DEBUG("Poll got error event %d", errno);
          if (errno == EINTR || errno == EAGAIN)
@@ -653,22 +786,32 @@ int virEventPollRunOnce(void)
      if (virEventPollDispatchTimeouts() < 0)
          goto error;
+#ifdef HAVE_LINUX_EPOLL
+    if (ret > 0 &&
+        virEventPollDispatchHandles(nfds, events) < 0)
+        goto error;
+#else
      if (ret > 0 &&
          virEventPollDispatchHandles(nfds, fds) < 0)
          goto error;
+#endif
virEventPollCleanupTimeouts();
      virEventPollCleanupHandles();
eventLoop.running = 0;
      virMutexUnlock(&eventLoop.lock);
+#ifndef HAVE_LINUX_EPOLL
      VIR_FREE(fds);
+#endif
      return 0;
error:
      virMutexUnlock(&eventLoop.lock);
   error_unlocked:
+#ifndef HAVE_LINUX_EPOLL
      VIR_FREE(fds);
+#endif
      return -1;
  }
@@ -698,6 +841,17 @@ int virEventPollInit(void)
          return -1;
      }
+#ifdef HAVE_LINUX_EPOLL
+    eventLoop.epollfd = epoll_create1(0);
+    if (eventLoop.epollfd < 0) {
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("Unable to initialize epoll"));
+        VIR_FORCE_CLOSE(eventLoop.wakeupfd[0]);
+        VIR_FORCE_CLOSE(eventLoop.wakeupfd[1]);
+        return -1;
+    }
+#endif
+
      if (virEventPollAddHandle(eventLoop.wakeupfd[0],
                                VIR_EVENT_HANDLE_READABLE,
                                virEventPollHandleWakeup, NULL, NULL) < 0) {
@@ -706,6 +860,9 @@ int virEventPollInit(void)
                         eventLoop.wakeupfd[0]);
          VIR_FORCE_CLOSE(eventLoop.wakeupfd[0]);
          VIR_FORCE_CLOSE(eventLoop.wakeupfd[1]);
+#ifdef HAVE_LINUX_EPOLL
+        VIR_FORCE_CLOSE(eventLoop.epollfd);
+#endif
          return -1;
      }
@@ -742,6 +899,16 @@ int
  virEventPollToNativeEvents(int events)
  {
      int ret = 0;
+#ifdef HAVE_LINUX_EPOLL
+    if (events & VIR_EVENT_HANDLE_READABLE)
+        ret |= EPOLLIN;
+    if (events & VIR_EVENT_HANDLE_WRITABLE)
+        ret |= EPOLLOUT;
+    if (events & VIR_EVENT_HANDLE_ERROR)
+        ret |= EPOLLERR;
+    if (events & VIR_EVENT_HANDLE_HANGUP)
+        ret |= EPOLLHUP;
+#else
      if (events & VIR_EVENT_HANDLE_READABLE)
          ret |= POLLIN;
      if (events & VIR_EVENT_HANDLE_WRITABLE)
@@ -750,6 +917,7 @@ virEventPollToNativeEvents(int events)
          ret |= POLLERR;
      if (events & VIR_EVENT_HANDLE_HANGUP)
          ret |= POLLHUP;
+#endif
      return ret;
  }
@@ -757,6 +925,16 @@ int
  virEventPollFromNativeEvents(int events)
  {
      int ret = 0;
+#ifdef HAVE_LINUX_EPOLL
+    if (events & EPOLLIN)
+        ret |= VIR_EVENT_HANDLE_READABLE;
+    if (events & EPOLLOUT)
+        ret |= VIR_EVENT_HANDLE_WRITABLE;
+    if (events & EPOLLERR)
+        ret |= VIR_EVENT_HANDLE_ERROR;
+    if (events & EPOLLHUP)
+        ret |= VIR_EVENT_HANDLE_HANGUP;
+#else
      if (events & POLLIN)
          ret |= VIR_EVENT_HANDLE_READABLE;
      if (events & POLLOUT)
@@ -767,5 +945,6 @@ virEventPollFromNativeEvents(int events)
          ret |= VIR_EVENT_HANDLE_ERROR;
      if (events & POLLHUP)
          ret |= VIR_EVENT_HANDLE_HANGUP;
+#endif
      return ret;
  }
diff --git a/src/util/vireventpoll.h b/src/util/vireventpoll.h
index 8844c9c..172009a 100644
--- a/src/util/vireventpoll.h
+++ b/src/util/vireventpoll.h
@@ -26,6 +26,11 @@
# include "internal.h" +# ifdef HAVE_LINUX_EPOLL
+/* Maximum number of events that are returned by epoll in virEventPollRunOnce */
+#  define MAX_POLL_EVENTS_AT_ONCE 10
+# endif
+
  /**
   * virEventPollAddHandle: register a callback for monitoring file handle events
   *
diff --git a/tests/commanddata/test3epoll.log b/tests/commanddata/test3epoll.log
new file mode 100644
index 0000000..e4619b3
--- /dev/null
+++ b/tests/commanddata/test3epoll.log
@@ -0,0 +1,15 @@
+ENV:DISPLAY=:0.0
+ENV:HOME=/home/test
+ENV:HOSTNAME=test
+ENV:LANG=C
+ENV:LOGNAME=testTMPDIR=/tmp
+ENV:PATH=/usr/bin:/bin
+ENV:USER=test
+FD:0
+FD:1
+FD:2
+FD:6
+FD:8
+DAEMON:no
+CWD:/tmp
+UMASK:0022
diff --git a/tests/commandtest.c b/tests/commandtest.c
index 7bf5447..6b23659 100644
--- a/tests/commandtest.c
+++ b/tests/commandtest.c
@@ -227,7 +227,11 @@ static int test3(const void *unused ATTRIBUTE_UNUSED)
          goto cleanup;
      }
+#ifdef HAVE_LINUX_EPOLL
+    ret = checkoutput("test3epoll", NULL);
+#else
      ret = checkoutput("test3", NULL);
+#endif
cleanup:
      virCommandFree(cmd);

Attachment: local_epoll_wo_interrupt_disp_on_3600s_35vms.svg
Description: image/svg

Attachment: local_disp_on_3600s_35vms.svg
Description: image/svg


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]