[libvirt] [PATCH v5 6/6] block: Enable qemu_open/close to work with fd sets

Corey Bryant coreyb at linux.vnet.ibm.com
Mon Jul 23 13:08:05 UTC 2012


When qemu_open is passed a filename of the "/dev/fdset/nnn"
format (where nnn is the fdset ID), an fd with matching access
mode flags will be searched for within the specified monitor
fd set.  If the fd is found, a dup of the fd will be returned
from qemu_open.

Each fd set has a reference count.  The purpose of the reference
count is to determine if an fd set contains file descriptors that
have open dup() references that have not yet been closed.  It is
incremented on qemu_open and decremented on qemu_close.  It is
not until the refcount is zero that file desriptors in an fd set
can be closed.  If an fd set has dup() references open, then we
must keep the other fds in the fd set open in case a reopen
of the file occurs that requires an fd with a different access
mode.

Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>

v2:
 -Get rid of file_open and move dup code to qemu_open
  (kwolf at redhat.com)
 -Use strtol wrapper instead of atoi (kwolf at redhat.com)

v3:
 -Add note about fd leakage (eblake at redhat.com)

v4
 -Moved patch to be later in series (lcapitulino at redhat.com)
 -Update qemu_open to check access mode flags and set flags that
  can be set (eblake at redhat.com, kwolf at redhat.com)

v5:
 -This patch was overhauled quite a bit in this version, with
  the addition of fd set and refcount support.
 -Use qemu_set_cloexec() on dup'd fd (eblake at redhat.com)
 -Modify flags set by fcntl on dup'd fd (eblake at redhat.com)
 -Reduce syscalls when setting flags for dup'd fd (eblake at redhat.com)
 -Fix O_RDWR, O_RDONLY, O_WRONLY checks (eblake at redhat.com)
---
 block/raw-posix.c |   24 +++++-----
 block/raw-win32.c |    2 +-
 block/vmdk.c      |    4 +-
 block/vpc.c       |    2 +-
 block/vvfat.c     |   12 ++---
 cutils.c          |    5 ++
 monitor.c         |   85 +++++++++++++++++++++++++++++++++
 monitor.h         |    4 ++
 osdep.c           |  138 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 qemu-common.h     |    3 +-
 qemu-tool.c       |   12 +++++
 11 files changed, 267 insertions(+), 24 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index a172de3..5d0a801 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -271,7 +271,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
 out_free_buf:
     qemu_vfree(s->aligned_buf);
 out_close:
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return -errno;
 }
 
@@ -376,7 +376,7 @@ static void raw_close(BlockDriverState *bs)
 {
     BDRVRawState *s = bs->opaque;
     if (s->fd >= 0) {
-        qemu_close(s->fd);
+        qemu_close(s->fd, bs->filename);
         s->fd = -1;
         if (s->aligned_buf != NULL)
             qemu_vfree(s->aligned_buf);
@@ -580,7 +580,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
         if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
             result = -errno;
         }
-        if (qemu_close(fd) != 0) {
+        if (qemu_close(fd, filename) != 0) {
             result = -errno;
         }
     }
@@ -850,7 +850,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
             if (fd < 0) {
                 bsdPath[strlen(bsdPath)-1] = '1';
             } else {
-                qemu_close(fd);
+                qemu_close(fd, bsdPath);
             }
             filename = bsdPath;
         }
@@ -889,7 +889,7 @@ static int fd_open(BlockDriverState *bs)
     last_media_present = (s->fd >= 0);
     if (s->fd >= 0 &&
         (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
-        qemu_close(s->fd);
+        qemu_close(s->fd, bs->filename);
         s->fd = -1;
 #ifdef DEBUG_FLOPPY
         printf("Floppy closed\n");
@@ -988,7 +988,7 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options)
     else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
         ret = -ENOSPC;
 
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return ret;
 }
 
@@ -1038,7 +1038,7 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
         return ret;
 
     /* close fd so that we can reopen it as needed */
-    qemu_close(s->fd);
+    qemu_close(s->fd, filename);
     s->fd = -1;
     s->fd_media_changed = 1;
 
@@ -1070,7 +1070,7 @@ static int floppy_probe_device(const char *filename)
         prio = 100;
 
 outc:
-    qemu_close(fd);
+    qemu_close(fd, filename);
 out:
     return prio;
 }
@@ -1105,14 +1105,14 @@ static void floppy_eject(BlockDriverState *bs, bool eject_flag)
     int fd;
 
     if (s->fd >= 0) {
-        qemu_close(s->fd);
+        qemu_close(s->fd, bs->filename);
         s->fd = -1;
     }
     fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
     if (fd >= 0) {
         if (ioctl(fd, FDEJECT, 0) < 0)
             perror("FDEJECT");
-        qemu_close(fd);
+        qemu_close(fd, bs->filename);
     }
 }
 
@@ -1173,7 +1173,7 @@ static int cdrom_probe_device(const char *filename)
         prio = 100;
 
 outc:
-    qemu_close(fd);
+    qemu_close(fd, filename);
 out:
     return prio;
 }
@@ -1281,7 +1281,7 @@ static int cdrom_reopen(BlockDriverState *bs)
      * FreeBSD seems to not notice sometimes...
      */
     if (s->fd >= 0)
-        qemu_close(s->fd);
+        qemu_close(s->fd, bs->filename);
     fd = qemu_open(bs->filename, s->open_flags, 0644);
     if (fd < 0) {
         s->fd = -1;
diff --git a/block/raw-win32.c b/block/raw-win32.c
index c56bf83..b795871 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -261,7 +261,7 @@ static int raw_create(const char *filename, QEMUOptionParameter *options)
         return -EIO;
     set_sparse(fd);
     ftruncate(fd, total_size * 512);
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return 0;
 }
 
diff --git a/block/vmdk.c b/block/vmdk.c
index daee426..7f1206b 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -1258,7 +1258,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
 
     ret = 0;
  exit:
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return ret;
 }
 
@@ -1506,7 +1506,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     }
     ret = 0;
 exit:
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return ret;
 }
 
diff --git a/block/vpc.c b/block/vpc.c
index c0b82c4..20f648a 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -744,7 +744,7 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
     }
 
  fail:
-    qemu_close(fd);
+    qemu_close(fd, filename);
     return ret;
 }
 
diff --git a/block/vvfat.c b/block/vvfat.c
index 59d3c5b..cbc7543 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1105,7 +1105,7 @@ static inline void vvfat_close_current_file(BDRVVVFATState *s)
     if(s->current_mapping) {
 	s->current_mapping = NULL;
 	if (s->current_fd) {
-		qemu_close(s->current_fd);
+		qemu_close(s->current_fd, s->current_mapping->path);
 		s->current_fd = 0;
 	}
     }
@@ -2230,7 +2230,7 @@ static int commit_one_file(BDRVVVFATState* s,
     }
     if (offset > 0) {
         if (lseek(fd, offset, SEEK_SET) != offset) {
-            qemu_close(fd);
+            qemu_close(fd, mapping->path);
             g_free(cluster);
             return -3;
         }
@@ -2251,13 +2251,13 @@ static int commit_one_file(BDRVVVFATState* s,
 	    (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
 
         if (ret < 0) {
-            qemu_close(fd);
+            qemu_close(fd, mapping->path);
             g_free(cluster);
             return ret;
         }
 
         if (write(fd, cluster, rest_size) < 0) {
-            qemu_close(fd);
+            qemu_close(fd, mapping->path);
             g_free(cluster);
             return -2;
         }
@@ -2268,11 +2268,11 @@ static int commit_one_file(BDRVVVFATState* s,
 
     if (ftruncate(fd, size)) {
         perror("ftruncate()");
-        qemu_close(fd);
+        qemu_close(fd, mapping->path);
         g_free(cluster);
         return -4;
     }
-    qemu_close(fd);
+    qemu_close(fd, mapping->path);
     g_free(cluster);
 
     return commit_mappings(s, first_cluster, dir_index);
diff --git a/cutils.c b/cutils.c
index e2bc1b8..4e6bfdc 100644
--- a/cutils.c
+++ b/cutils.c
@@ -375,3 +375,8 @@ int qemu_parse_fd(const char *param)
     }
     return fd;
 }
+
+int qemu_parse_fdset(const char *param)
+{
+    return qemu_parse_fd(param);
+}
diff --git a/monitor.c b/monitor.c
index 30b085f..fddd2b5 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2551,6 +2551,91 @@ static void monitor_fdsets_set_in_use(Monitor *mon, bool in_use)
     }
 }
 
+void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id)
+{
+    mon_fdset_t *mon_fdset;
+
+    if (!mon) {
+        return;
+    }
+
+    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+        if (mon_fdset->id == fdset_id) {
+            mon_fdset->refcount++;
+            break;
+        }
+    }
+}
+
+void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id)
+{
+    mon_fdset_t *mon_fdset;
+
+    if (!mon) {
+        return;
+    }
+
+    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+        if (mon_fdset->id == fdset_id) {
+            mon_fdset->refcount--;
+            if (mon_fdset->refcount == 0) {
+                monitor_fdset_cleanup(mon_fdset);
+            }
+            break;
+        }
+    }
+}
+
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
+{
+    mon_fdset_t *mon_fdset;
+    mon_fdset_fd_t *mon_fdset_fd;
+    int mon_fd_flags;
+
+    if (!mon) {
+        errno = ENOENT;
+        return -1;
+    }
+
+    QLIST_FOREACH(mon_fdset, &mon->fdsets, next) {
+        if (mon_fdset->id != fdset_id) {
+            continue;
+        }
+        QLIST_FOREACH(mon_fdset_fd, &mon_fdset->fds, next) {
+            if (mon_fdset_fd->removed) {
+                continue;
+            }
+
+            mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
+            if (mon_fd_flags == -1) {
+                return -1;
+            }
+
+            switch (flags & O_ACCMODE) {
+            case O_RDWR:
+                if ((mon_fd_flags & O_ACCMODE) == O_RDWR) {
+                    return mon_fdset_fd->fd;
+                }
+                break;
+            case O_RDONLY:
+                if ((mon_fd_flags & O_ACCMODE) == O_RDONLY) {
+                    return mon_fdset_fd->fd;
+                }
+                break;
+            case O_WRONLY:
+                if ((mon_fd_flags & O_ACCMODE) == O_WRONLY) {
+                    return mon_fdset_fd->fd;
+                }
+                break;
+            }
+        }
+        errno = EACCES;
+        return -1;
+    }
+    errno = ENOENT;
+    return -1;
+}
+
 /* mon_cmds and info_cmds would be sorted at runtime */
 static mon_cmd_t mon_cmds[] = {
 #include "hmp-commands.h"
diff --git a/monitor.h b/monitor.h
index 5f4de1b..1efed38 100644
--- a/monitor.h
+++ b/monitor.h
@@ -86,4 +86,8 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
 
 int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
 
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags);
+void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id);
+void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id);
+
 #endif /* !MONITOR_H */
diff --git a/osdep.c b/osdep.c
index 7b09dff..0303cdd 100644
--- a/osdep.c
+++ b/osdep.c
@@ -47,6 +47,7 @@ extern int madvise(caddr_t, size_t, int);
 #include "qemu-common.h"
 #include "trace.h"
 #include "qemu_socket.h"
+#include "monitor.h"
 
 static const char *qemu_version = QEMU_VERSION;
 
@@ -75,6 +76,79 @@ int qemu_madvise(void *addr, size_t len, int advice)
 #endif
 }
 
+/*
+ * Dups an fd and sets the flags
+ */
+static int qemu_dup(int fd, int flags)
+{
+    int i;
+    int ret;
+    int serrno;
+    int dup_flags;
+    int setfl_flags[] = { O_APPEND, O_ASYNC, O_DIRECT, O_NOATIME,
+                          O_NONBLOCK, 0 };
+
+    if (flags & O_CLOEXEC) {
+        ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+        if (ret == -1 && errno == EINVAL) {
+            ret = dup(fd);
+            if (ret != -1 && fcntl_setfl(ret, O_CLOEXEC) == -1) {
+                goto fail;
+            }
+        }
+    } else {
+        ret = dup(fd);
+    }
+
+    if (ret == -1) {
+        goto fail;
+    }
+
+    dup_flags = fcntl(ret, F_GETFL);
+    if (dup_flags == -1) {
+        goto fail;
+    }
+
+    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+        errno = EINVAL;
+        goto fail;
+    }
+
+    /* Set/unset flags that we can with fcntl */
+    i = 0;
+    while (setfl_flags[i] != 0) {
+        if (flags & setfl_flags[i]) {
+            dup_flags = (dup_flags | setfl_flags[i]);
+        } else {
+            dup_flags = (dup_flags & ~setfl_flags[i]);
+        }
+        i++;
+    }
+
+    if (fcntl(ret, F_SETFL, dup_flags) == -1) {
+        goto fail;
+    }
+
+    /* Truncate the file in the cases that open() would truncate it */
+    if (flags & O_TRUNC ||
+            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+        if (ftruncate(ret, 0) == -1) {
+            goto fail;
+        }
+    }
+
+    qemu_set_cloexec(ret);
+
+    return ret;
+
+fail:
+    serrno = errno;
+    if (ret != -1) {
+        close(ret);
+    }
+    errno = serrno;
+    return -1;
+}
 
 /*
  * Opens a file with FD_CLOEXEC set
@@ -84,6 +158,36 @@ int qemu_open(const char *name, int flags, ...)
     int ret;
     int mode = 0;
 
+#ifndef _WIN32
+    const char *fdset_id_str;
+
+    /* Attempt dup of fd from fd set */
+    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+        int64_t fdset_id;
+        int fd, dupfd;
+
+        fdset_id = qemu_parse_fdset(fdset_id_str);
+        if (fdset_id == -1) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);
+        if (fd == -1) {
+            return -1;
+        }
+
+        dupfd = qemu_dup(fd, flags);
+        if (fd == -1) {
+            return -1;
+        }
+
+        monitor_fdset_increment_refcount(default_mon, fdset_id);
+
+        return dupfd;
+    }
+#endif
+
     if (flags & O_CREAT) {
         va_list ap;
 
@@ -104,8 +208,40 @@ int qemu_open(const char *name, int flags, ...)
     return ret;
 }
 
-int qemu_close(int fd)
+int qemu_close(int fd, const char *name)
 {
+    const char *fdset_id_str;
+
+    /* Close fd that was dup'd from an fdset */
+    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+        int ret;
+        int64_t fdset_id;
+        int fdset_fd;
+        int flags;
+
+        fdset_id = qemu_parse_fdset(fdset_id_str);
+        if (fdset_id == -1) {
+            return -1;
+        }
+
+        flags = fcntl(fd, F_GETFL);
+        if (flags == -1) {
+            return -1;
+        }
+
+        fdset_fd = monitor_fdset_get_fd(default_mon, fdset_id, flags);
+        if (fdset_fd == -1) {
+            return -1;
+        }
+
+        ret = close(fd);
+        if (ret == 0) {
+            monitor_fdset_decrement_refcount(default_mon, fdset_id);
+        }
+
+        return ret;
+    }
+
     return close(fd);
 }
 
diff --git a/qemu-common.h b/qemu-common.h
index c8ddf2f..6b4123c 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -147,6 +147,7 @@ int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
 int qemu_parse_fd(const char *param);
+int qemu_parse_fdset(const char *param);
 
 /*
  * strtosz() suffixes used to specify the default treatment of an
@@ -188,7 +189,7 @@ const char *path(const char *pathname);
 void *qemu_oom_check(void *ptr);
 
 int qemu_open(const char *name, int flags, ...);
-int qemu_close(int fd);
+int qemu_close(int fd, const char *name);
 ssize_t qemu_write_full(int fd, const void *buf, size_t count)
     QEMU_WARN_UNUSED_RESULT;
 ssize_t qemu_send_full(int fd, const void *buf, size_t count, int flags)
diff --git a/qemu-tool.c b/qemu-tool.c
index 318c5fc..f4db9db 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -31,6 +31,7 @@ struct QEMUBH
 };
 
 Monitor *cur_mon;
+Monitor *default_mon;
 
 int monitor_cur_is_qmp(void)
 {
@@ -57,6 +58,17 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
 {
 }
 
+int monitor_fdset_get_fd(Monitor *mon, int64_t fdset_id, int flags)
+{
+    return -1;
+}
+void monitor_fdset_increment_refcount(Monitor *mon, int64_t fdset_id)
+{
+}
+void monitor_fdset_decrement_refcount(Monitor *mon, int64_t fdset_id)
+{
+}
+
 int64_t cpu_get_clock(void)
 {
     return qemu_get_clock_ns(rt_clock);
-- 
1.7.10.4




More information about the libvir-list mailing list