[Cluster-devel] [PATCH] GFS2: directly write blocks past i_size
Benjamin Marzinski
bmarzins at redhat.com
Thu Mar 17 14:05:41 UTC 2011
On Thu, Mar 17, 2011 at 09:26:08AM +0000, Steven Whitehouse wrote:
> Hi,
>
> On Thu, 2011-03-17 at 00:08 -0500, Benjamin Marzinski wrote:
> > GFS2 was relying on the writepage code to write out the zeroed data for
> > fallocate. However, with FALLOC_FL_KEEP_SIZE set, this may be past i_size.
> > If it is, it will be ignored. To work around this, gfs2 now calls
> > write_dirty_buffer directly on the buffer_heads when FALLOC_FL_KEEP_SIZE
> > is set, and it's writing past i_size.
> >
> > Signed-off-by: Benjamin Marzinski <bmarzins at redhat.com>
> > ---
> > fs/gfs2/file.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++----------
> > 1 file changed, 52 insertions(+), 10 deletions(-)
> >
> Generally looks good, but a few minor points....
>
> > Index: gfs2-2.6-nmw/fs/gfs2/file.c
> > ===================================================================
> > --- gfs2-2.6-nmw.orig/fs/gfs2/file.c
> > +++ gfs2-2.6-nmw/fs/gfs2/file.c
> > @@ -617,18 +617,55 @@ static ssize_t gfs2_file_aio_write(struc
> > return generic_file_aio_write(iocb, iov, nr_segs, pos);
> > }
> >
> > -static void empty_write_end(struct page *page, unsigned from,
> > - unsigned to)
> > +static int empty_write_end(struct page *page, unsigned from,
> > + unsigned to, int mode)
> > {
> > - struct gfs2_inode *ip = GFS2_I(page->mapping->host);
> > + struct inode *inode = page->mapping->host;
> > + struct gfs2_inode *ip = GFS2_I(inode);
> > + struct buffer_head *bh;
> > + int waiting = 0;
> > + unsigned offset, blksize = 1 << inode->i_blkbits;
> > + loff_t i_size = i_size_read(inode);
> > + pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
> >
> > zero_user(page, from, to-from);
> > mark_page_accessed(page);
> >
> > - if (!gfs2_is_writeback(ip))
> > - gfs2_page_add_databufs(ip, page, from, to);
> > + if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
> > + if (!gfs2_is_writeback(ip))
> > + gfs2_page_add_databufs(ip, page, from, to);
> >
> > - block_commit_write(page, from, to);
> > + block_commit_write(page, from, to);
> > + return 0;
> > + }
> > +
> > +second_pass:
> > + offset = 0;
> > + bh = page_buffers(page);
> > + while (offset < from) {
> > + offset += blksize;
> > + bh = bh->b_this_page;
> > + }
> > + while (offset < to) {
> > + if (waiting){
> > + wait_on_buffer(bh);
> > + if (!buffer_uptodate(bh))
> > + return -EIO;
> > + }
> > + else {
> The else should start on the line above to make it more obvious.
>
> > + set_buffer_uptodate(bh);
> > + mark_buffer_dirty(bh);
> > + clear_buffer_new(bh);
> > + write_dirty_buffer(bh, WRITE);
> Should this be WRITE_SYNC or WRITE_SYNC_PLUG I wonder?
>
> > + }
> > + offset += blksize;
> > + bh = bh->b_this_page;
> > + }
> > + if (!waiting) {
> > + waiting = 1;
> > + goto second_pass;
> > + }
> I think the code might be a bit cleaner if it was just written as two
> loops, one after the other since most of the loop content seems to be
> different according to weather "waiting" is set or not.
>
> Otherwise I think this is a good solution,
>
> Steve.
>
Sure. This patch kept evolving, and I probably should have given it a
better cleanup once I was satisfied that it worked. I send off a new
version.
-Ben
> > + return 0;
> > }
> >
> > static int needs_empty_write(sector_t block, struct inode *inode)
> > @@ -643,7 +680,8 @@ static int needs_empty_write(sector_t bl
> > return !buffer_mapped(&bh_map);
> > }
> >
> > -static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
> > +static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
> > + int mode)
> > {
> > struct inode *inode = page->mapping->host;
> > unsigned start, end, next, blksize;
> > @@ -668,7 +706,9 @@ static int write_empty_blocks(struct pag
> > gfs2_block_map);
> > if (unlikely(ret))
> > return ret;
> > - empty_write_end(page, start, end);
> > + ret = empty_write_end(page, start, end, mode);
> > + if (unlikely(ret))
> > + return ret;
> > end = 0;
> > }
> > start = next;
> > @@ -682,7 +722,9 @@ static int write_empty_blocks(struct pag
> > ret = __block_write_begin(page, start, end - start, gfs2_block_map);
> > if (unlikely(ret))
> > return ret;
> > - empty_write_end(page, start, end);
> > + ret = empty_write_end(page, start, end, mode);
> > + if (unlikely(ret))
> > + return ret;
> > }
> >
> > return 0;
> > @@ -731,7 +773,7 @@ static int fallocate_chunk(struct inode
> >
> > if (curr == end)
> > to = end_offset;
> > - error = write_empty_blocks(page, from, to);
> > + error = write_empty_blocks(page, from, to, mode);
> > if (!error && offset + to > inode->i_size &&
> > !(mode & FALLOC_FL_KEEP_SIZE)) {
> > i_size_write(inode, offset + to);
> >
>
More information about the Cluster-devel
mailing list