[dm-devel] Re: [PATCH] locking update for VFS locking patch
Kevin Corry
kevcorry at us.ibm.com
Thu Feb 26 18:22:01 UTC 2004
On Thursday 26 February 2004 3:41 pm, Chris Mason wrote:
> On Thu, 2004-02-26 at 15:43, Kevin Corry wrote:
> > I've been trying to test out the VFS-lock patch, but haven't been having
> > any luck with it. I applied it to a clean 2.6.3 kernel (along with the
> > kgdb patch from -mm), and it consistently BUGs when it tries to mount the
> > root filesystem. I've captured the console output through gdb and
> > attached it below, along with the kernel config. My root filesystem is
> > ext3 if that helps.
> >
> > I'll keep hunting for the problem, but I figured I'd send this
> > information to you to start with.
>
> This one should work better for you.
I've completed some simple tests (just light loads so far) with the new
VFS-lock patch, and things seem to be working correctly. I've included
the patch below, along with the proposed changes to dm.c to actually call
the APIs.
--
Kevin Corry
kevcorry at us.ibm.com
http://evms.sourceforge.net/
VFS-Lock patch.
--- diff/drivers/md/dm.c 2004-02-25 16:19:20.000000000 -0600
+++ source/drivers/md/dm.c 2004-02-26 16:46:04.000000000 -0600
@@ -12,6 +12,7 @@
#include <linux/moduleparam.h>
#include <linux/blkpg.h>
#include <linux/bio.h>
+#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include <linux/slab.h>
@@ -46,6 +47,7 @@
*/
#define DMF_BLOCK_IO 0
#define DMF_SUSPENDED 1
+#define DMF_FS_LOCKED 2
struct mapped_device {
struct rw_semaphore lock;
@@ -826,6 +828,24 @@
return 0;
}
+static void __lock_disk(struct gendisk *disk)
+{
+ struct block_device *bdev = bdget_disk(disk, 0);
+ if (bdev) {
+ fsync_bdev_lockfs(bdev);
+ bdput(bdev);
+ }
+}
+
+static void __unlock_disk(struct gendisk *disk)
+{
+ struct block_device *bdev = bdget_disk(disk, 0);
+ if (bdev) {
+ unlockfs(bdev);
+ bdput(bdev);
+ }
+}
+
/*
* We need to be able to change a mapping table under a mounted
* filesystem. For example we might want to move some data in
@@ -837,12 +857,23 @@
{
DECLARE_WAITQUEUE(wait, current);
- down_write(&md->lock);
+ /* Flush I/O to the device. */
+ down_read(&md->lock);
+ if (test_bit(DMF_BLOCK_IO, &md->flags)) {
+ up_read(&md->lock);
+ return -EINVAL;
+ }
+
+ if (!test_and_set_bit(DMF_FS_LOCKED, &md->flags)) {
+ __lock_disk(md->disk);
+ }
+ up_read(&md->lock);
/*
* First we set the BLOCK_IO flag so no more ios will be
* mapped.
*/
+ down_write(&md->lock);
if (test_bit(DMF_BLOCK_IO, &md->flags)) {
up_write(&md->lock);
return -EINVAL;
@@ -892,11 +923,13 @@
dm_table_resume_targets(md->map);
clear_bit(DMF_SUSPENDED, &md->flags);
clear_bit(DMF_BLOCK_IO, &md->flags);
+ clear_bit(DMF_FS_LOCKED, &md->flags);
def = bio_list_get(&md->deferred);
__flush_deferred_io(md, def);
up_write(&md->lock);
+ __unlock_disk(md->disk);
blk_run_queues();
return 0;
--- diff/fs/block_dev.c 2004-02-26 15:50:53.000000000 -0600
+++ source/fs/block_dev.c 2004-02-26 15:50:41.000000000 -0600
@@ -242,6 +242,7 @@
{
memset(bdev, 0, sizeof(*bdev));
sema_init(&bdev->bd_sem, 1);
+ sema_init(&bdev->bd_mount_sem, 1);
INIT_LIST_HEAD(&bdev->bd_inodes);
INIT_LIST_HEAD(&bdev->bd_list);
inode_init_once(&ei->vfs_inode);
--- diff/fs/buffer.c 2004-02-18 10:39:07.000000000 -0600
+++ source/fs/buffer.c 2004-02-26 10:59:52.000000000 -0600
@@ -259,6 +259,17 @@
return sync_blockdev(bdev);
}
+int fsync_bdev_lockfs(struct block_device *bdev)
+{
+ int res;
+ res = fsync_bdev(bdev);
+ if (res)
+ return res;
+ sync_super_lockfs(bdev);
+ return sync_blockdev(bdev);
+}
+EXPORT_SYMBOL(fsync_bdev_lockfs);
+
/*
* sync everything. Start out by waking pdflush, because that writes back
* all queues in parallel.
--- diff/fs/reiserfs/super.c 2004-02-17 21:57:47.000000000 -0600
+++ source/fs/reiserfs/super.c 2004-02-26 10:59:51.000000000 -0600
@@ -82,7 +82,7 @@
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
reiserfs_block_writes(&th) ;
- journal_end(&th, s, 1) ;
+ journal_end_sync(&th, s, 1) ;
}
s->s_dirt = dirty;
reiserfs_write_unlock(s);
--- diff/fs/super.c 2004-02-18 10:39:09.000000000 -0600
+++ source/fs/super.c 2004-02-26 10:59:52.000000000 -0600
@@ -293,6 +293,62 @@
}
/*
+ * triggered by the device mapper code to lock a filesystem and force
+ * it into a consistent state.
+ *
+ * This takes the block device bd_mount_sem to make sure no new mounts
+ * happen on bdev until unlockfs is called. If a super is found on this
+ * block device, we hould a read lock on the s->s_umount sem to make sure
+ * nobody unmounts until the snapshot creation is done
+ */
+void sync_super_lockfs(struct block_device *bdev)
+{
+ struct super_block *sb;
+ down(&bdev->bd_mount_sem);
+ sb = get_super(bdev);
+ if (sb) {
+ lock_super(sb);
+ if (sb->s_dirt && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ if (sb->s_op->write_super_lockfs)
+ sb->s_op->write_super_lockfs(sb);
+ unlock_super(sb);
+ }
+ /* unlockfs releases s->s_umount and bd_mount_sem */
+}
+
+void unlockfs(struct block_device *bdev)
+{
+ struct list_head *p;
+ /*
+ * copied from get_super, but we need to
+ * do special things since lockfs left the
+ * s_umount sem held
+ */
+ spin_lock(&sb_lock);
+ list_for_each(p, &super_blocks) {
+ struct super_block *s = sb_entry(p);
+ /*
+ * if there is a super for this block device
+ * in the list, get_super must have found it
+ * during sync_super_lockfs, so our drop_super
+ * will drop the reference created there.
+ */
+ if (s->s_bdev == bdev && s->s_root) {
+ spin_unlock(&sb_lock);
+ if (s->s_op->unlockfs)
+ s->s_op->unlockfs(s);
+ drop_super(s);
+ goto unlock;
+ }
+ }
+ spin_unlock(&sb_lock);
+unlock:
+ up(&bdev->bd_mount_sem);
+}
+EXPORT_SYMBOL(unlockfs);
+
+/*
* Note: check the dirty flag before waiting, so we don't
* hold up the sync while mounting a device. (The newly
* mounted device won't need syncing.)
@@ -613,7 +669,14 @@
if (IS_ERR(bdev))
return (struct super_block *)bdev;
+ /*
+ * once the super is inserted into the list by sget, s_umount
+ * will protect the lockfs code from trying to start a snapshot
+ * while we are mounting
+ */
+ down(&bdev->bd_mount_sem);
s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
+ up(&bdev->bd_mount_sem);
if (IS_ERR(s))
goto out;
--- diff/include/linux/buffer_head.h 2004-02-17 21:57:12.000000000 -0600
+++ source/include/linux/buffer_head.h 2004-02-26 10:59:52.000000000 -0600
@@ -164,6 +164,8 @@
wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
void wake_up_buffer(struct buffer_head *bh);
int fsync_bdev(struct block_device *);
+int fsync_bdev_lockfs(struct block_device *);
+void unlockfs(struct block_device *);
int fsync_super(struct super_block *);
int fsync_no_super(struct block_device *);
struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
--- diff/include/linux/fs.h 2004-02-18 10:39:10.000000000 -0600
+++ source/include/linux/fs.h 2004-02-26 10:59:52.000000000 -0600
@@ -346,6 +346,7 @@
struct inode * bd_inode; /* will die */
int bd_openers;
struct semaphore bd_sem; /* open/close mutex */
+ struct semaphore bd_mount_sem; /* mount mutex */
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
@@ -1221,6 +1222,7 @@
extern int filemap_fdatawait(struct address_space *);
extern int filemap_write_and_wait(struct address_space *mapping);
extern void sync_supers(void);
+extern void sync_super_lockfs(struct block_device *);
extern void sync_filesystems(int wait);
extern void emergency_sync(void);
extern void emergency_remount(void);
More information about the dm-devel
mailing list