[libvirt] [PATCHv2 11/16] save: let iohelper work on O_DIRECT fds

Eric Blake eblake at redhat.com
Wed Jul 20 04:20:34 UTC 2011


Required for a coming patch where iohelper will operate on O_DIRECT
fds.  There, the user-space memory must be aligned to file system
boundaries (at least 512, but using page-aligned works better, and
some file systems prefer 64k).  Made tougher by the fact that
VIR_ALLOC won't work on void *, but posix_memalign won't work on
char * and isn't available everywhere.

This patch makes some simplifying assumptions - namely, output
to an O_DIRECT fd will only be attempted on an empty seekable
file (hence, no need to worry about preserving existing data
on a partial block, and ftruncate will work to undo the effects
of having to round up the size of the last block written), and
input from an O_DIRECT fd will only be attempted on a complete
seekable file with the only possible short read at EOF.

* configure.ac (AC_CHECK_FUNCS_ONCE): Check for posix_memalign.
* src/util/iohelper.c (runIO): Use aligned memory, and handle
quirks of O_DIRECT on last write.
---

v2: merge patch 6 and 15 of v1.
Sorry, I didn't add a testsuite of this yet, but agree that
it would be a nice project

 configure.ac        |    6 ++--
 src/util/iohelper.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index e9d5be4..9e39f44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -121,9 +121,9 @@ AC_CHECK_SIZEOF([long])

 dnl Availability of various common functions (non-fatal if missing),
 dnl and various less common threadsafe functions
-AC_CHECK_FUNCS_ONCE([cfmakeraw regexec sched_getaffinity getuid getgid \
- geteuid initgroups posix_fallocate mmap kill \
- getmntent_r getgrnam_r getpwuid_r])
+AC_CHECK_FUNCS_ONCE([cfmakeraw geteuid getgid getgrnam_r getmntent_r \
+  getpwuid_r getuid initgroups kill mmap posix_fallocate posix_memalign \
+  regexec sched_getaffinity])

 dnl Availability of pthread functions (if missing, win32 threading is
 dnl assumed).  Because of $LIB_PTHREAD, we cannot use AC_CHECK_FUNCS_ONCE.
diff --git a/src/util/iohelper.c b/src/util/iohelper.c
index 502bce5..9e7bbde 100644
--- a/src/util/iohelper.c
+++ b/src/util/iohelper.c
@@ -74,17 +74,32 @@ cleanup:
 static int
 runIO(const char *path, int fd, int oflags, unsigned long long length)
 {
-    char *buf = NULL;
+    void *base = NULL; /* Location to be freed */
+    char *buf = NULL; /* Aligned location within base */
     size_t buflen = 1024*1024;
+    intptr_t alignMask = 64*1024 - 1;
     int ret = -1;
     int fdin, fdout;
     const char *fdinname, *fdoutname;
     unsigned long long total = 0;
+    bool direct = O_DIRECT && ((oflags & O_DIRECT) != 0);
+    bool shortRead = false; /* true if we hit a short read */
+    off_t end = 0;

-    if (VIR_ALLOC_N(buf, buflen) < 0) {
+#if HAVE_POSIX_MEMALIGN
+    if (posix_memalign(&base, alignMask + 1, buflen)) {
         virReportOOMError();
         goto cleanup;
     }
+    buf = base;
+#else
+    if (VIR_ALLOC_N(buf, buflen + alignMask) < 0) {
+        virReportOOMError();
+        goto cleanup;
+    }
+    base = buf;
+    buf = (char *) (((intptr_t) base + alignMask) & alignMask);
+#endif

     switch (oflags & O_ACCMODE) {
     case O_RDONLY:
@@ -92,12 +107,26 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
         fdinname = path;
         fdout = STDOUT_FILENO;
         fdoutname = "stdout";
+        /* To make the implementation simpler, we give up on any
+         * attempt to use O_DIRECT in a non-trivial manner.  */
+        if (direct && ((end = lseek(fd, 0, SEEK_CUR)) != 0 || length)) {
+            virReportSystemError(end < 0 ? errno : EINVAL, "%s",
+                                 _("O_DIRECT read needs entire seekable file"));
+            goto cleanup;
+        }
         break;
     case O_WRONLY:
         fdin = STDIN_FILENO;
         fdinname = "stdin";
         fdout = fd;
         fdoutname = path;
+        /* To make the implementation simpler, we give up on any
+         * attempt to use O_DIRECT in a non-trivial manner.  */
+        if (direct && (end = lseek(fd, 0, SEEK_END)) != 0) {
+            virReportSystemError(end < 0 ? errno : EINVAL, "%s",
+                                 _("O_DIRECT write needs empty seekable file"));
+            goto cleanup;
+        }
         break;

     case O_RDWR:
@@ -124,12 +153,29 @@ runIO(const char *path, int fd, int oflags, unsigned long long length)
         }
         if (got == 0)
             break; /* End of file before end of requested data */
+        if (got < buflen || (buflen & alignMask)) {
+            /* O_DIRECT can handle at most one short read, at end of file */
+            if (direct && shortRead) {
+                virReportSystemError(EINVAL, "%s",
+                                     _("Too many short reads for O_DIRECT"));
+            }
+            shortRead = true;
+        }

         total += got;
+        if (fdout == fd && direct && shortRead) {
+            end = total;
+            memset(buf + got, 0, buflen - got);
+            got = (got + alignMask) & ~alignMask;
+        }
         if (safewrite(fdout, buf, got) < 0) {
             virReportSystemError(errno, _("Unable to write %s"), fdoutname);
             goto cleanup;
         }
+        if (end && ftruncate(fd, end) < 0) {
+            virReportSystemError(errno, _("Unable to truncate %s"), fdoutname);
+            goto cleanup;
+        }
     }

     ret = 0;
@@ -141,7 +187,7 @@ cleanup:
         ret = -1;
     }

-    VIR_FREE(buf);
+    VIR_FREE(base);
     return ret;
 }

-- 
1.7.4.4




More information about the libvir-list mailing list