[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[Linux-cluster] Re: GFS



Hi Mark,

On Thu, 11 Aug 2005, Pekka J Enberg wrote:
> Reading and writing from other filesystems to a GFS2 mmap'd file
> does not walk the vmas. Therefore, data consistency guarantees
> are different:

What I meant was that, if a filesystem requires vma walks, we need to do 
it VFS level with something like the following patch. With this, your 
filesystem would implement a_ops->iolock_acquire that sorts the locks
and takes them all. In case of GFS2, this would replace walk_vm().

Thoughts?

			Pekka

[PATCH] vfs: iolock

This patch introduces iolock which can be used by filesystems that require
special locking when accessing an mmap'd region.

Unfinished and untested.

Signed-off-by: Pekka Enberg <penberg cs helsinki fi>
---

 fs/Makefile            |    2 -
 fs/iolock.c            |   88 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/read_write.c        |   15 ++++++++
 include/linux/fs.h     |    2 +
 include/linux/iolock.h |   11 ++++++
 5 files changed, 117 insertions(+), 1 deletion(-)

Index: 2.6-mm/fs/iolock.c
===================================================================
--- /dev/null
+++ 2.6-mm/fs/iolock.c
@@ -0,0 +1,88 @@
+/*
+ * fs/iolock.c
+ *
+ * Derived from GFS2.
+ */
+
+#include <linux/iolock.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+/*
+ * I/O lock contains all files that participate in locking a memory region.
+ * It is used for filesystems that require special locks to access mmap'd
+ * memory.
+ */
+struct iolock {
+	struct address_space	*mapping;
+	unsigned long		nr_files;
+	struct file		**files;
+};
+
+struct iolock *iolock_region(const char __user *buf, size_t size)
+{
+	int err = -ENOMEM;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long start = (unsigned long)buf;
+	unsigned long end = start + size;
+	struct iolock *ret;
+
+	ret = kcalloc(1, sizeof(*ret), GFP_KERNEL);
+	if (!ret)
+		return ERR_PTR(-ENOMEM);
+
+	down_read(&mm->mmap_sem);
+
+	ret->files = kcalloc(mm->map_count, sizeof(struct file*), GFP_KERNEL);
+	if (!ret->files)
+		goto error;
+
+	for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
+		struct file *file;
+		struct address_space *mapping;
+
+		if (end <= vma->vm_start)
+			break;
+
+		file = vma->vm_file;
+		if (!file)
+			continue;
+
+		mapping = file->f_mapping;
+		if (!mapping->a_ops->iolock_acquire ||
+		    !mapping->a_ops->iolock_release)
+			continue;
+
+ 		/* FIXME: This only works when one address_space participates
+		          in the iolock. */
+		ret->mapping = mapping;
+		ret->files[ret->nr_files++] = file;
+	}
+out:
+	up_read(&mm->mmap_sem);
+
+	if (ret->mapping->a_ops->iolock_acquire) {
+		err = ret->mapping->a_ops->iolock_acquire(ret->files, ret->nr_files);
+		if (!err)
+			goto error;
+	}
+
+	return ret;
+
+error:
+	iolock_release(ret);
+	ret = ERR_PTR(err);
+	goto out;
+}
+
+void iolock_release(struct iolock *iolock)
+{
+	struct address_space *mapping = iolock->mapping;
+	if (mapping && mapping->a_ops->iolock_release)
+		mapping->a_ops->iolock_release(iolock->files, iolock->nr_files);
+	kfree(iolock->files);
+	kfree(iolock);
+}
Index: 2.6-mm/fs/read_write.c
===================================================================
--- 2.6-mm.orig/fs/read_write.c
+++ 2.6-mm/fs/read_write.c
@@ -14,6 +14,7 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/iolock.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -247,14 +248,21 @@ ssize_t vfs_read(struct file *file, char
 	if (!ret) {
 		ret = security_file_permission (file, MAY_READ);
 		if (!ret) {
+			struct iolock * iolock = iolock_region(buf, count);
+			if (IS_ERR(iolock)) {
+				ret = PTR_ERR(iolock);
+				goto out;
+			}
 			if (file->f_op->read)
 				ret = file->f_op->read(file, buf, count, pos);
 			else
 				ret = do_sync_read(file, buf, count, pos);
+			iolock_release(iolock);
 			if (ret > 0) {
 				fsnotify_access(file->f_dentry);
 				current->rchar += ret;
 			}
+  out:
 			current->syscr++;
 		}
 	}
@@ -298,14 +306,21 @@ ssize_t vfs_write(struct file *file, con
 	if (!ret) {
 		ret = security_file_permission (file, MAY_WRITE);
 		if (!ret) {
+			struct iolock * iolock = iolock_region(buf, count);
+			if (IS_ERR(iolock)) {
+				ret = PTR_ERR(iolock);
+				goto out;
+			}
 			if (file->f_op->write)
 				ret = file->f_op->write(file, buf, count, pos);
 			else
 				ret = do_sync_write(file, buf, count, pos);
+			iolock_release(iolock);
 			if (ret > 0) {
 				fsnotify_modify(file->f_dentry);
 				current->wchar += ret;
 			}
+  out:
 			current->syscw++;
 		}
 	}
Index: 2.6-mm/include/linux/iolock.h
===================================================================
--- /dev/null
+++ 2.6-mm/include/linux/iolock.h
@@ -0,0 +1,11 @@
+#ifndef __LINUX_IOLOCK_H
+#define __LINUX_IOLOCK_H
+
+#include <linux/kernel.h>
+
+struct iolock;
+
+struct iolock *iolock_region(const char __user *buf, size_t count);
+void iolock_release(struct iolock *lock);
+
+#endif
Index: 2.6-mm/fs/Makefile
===================================================================
--- 2.6-mm.orig/fs/Makefile
+++ 2.6-mm/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o
+		ioprio.o iolock.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_EPOLL)		+= eventpoll.o
Index: 2.6-mm/include/linux/fs.h
===================================================================
--- 2.6-mm.orig/include/linux/fs.h
+++ 2.6-mm/include/linux/fs.h
@@ -334,6 +334,8 @@ struct address_space_operations {
 			loff_t offset, unsigned long nr_segs);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
+	int (*iolock_acquire)(struct file **, unsigned long);
+	void (*iolock_release)(struct file **, unsigned long);
 };
 
 struct backing_dev_info;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]