[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: Ext3 and LFS - possible? fatal?



Nigel Metheringham writes:
> Has anyone tried LFS (ie >2G files support) and Ext3 together?
> Are there good reasons why this should/should not work?

I have already implemented an LFS patch for ext3, and sent it to Stephen.
I will post it here as well, but there are several other changes that I
have made to the ext3 source code which may cause the patch to not apply
cleanly on stock ext3-0.0.6b.

> I see the RH enterprise kernel patch set specifically does not attempt 
> both lfs and ext3, but the lfs patches themselves touch some reasonably 
> localised parts of ext2, so I would hope (without having dived in there 
> to test), that the ext3 changes would mirror that reasonably well...

Can't say about the RH kernel, but the ext3+LFS code has been under testing
at TurboLinux and not seen any problems.

Cheers, Andreas
====================== ext3-0.0.6b-lfs.diff ==============================
--- linux/fs/ext3/file.c.orig	Fri Jan  5 22:11:00 2001
+++ linux/fs/ext3/file.c	Mon Jan 15 13:21:44 2001
@@ -40,10 +40,6 @@
 static long long ext3_file_lseek(struct file *, long long, int);
 static ssize_t ext3_file_write (struct file *, const char *, size_t, loff_t *);
 static int ext3_release_file (struct inode *, struct file *);
-#if BITS_PER_LONG < 64
-static int ext3_open_file (struct inode *, struct file *);
-
-#else
 
 #define EXT3_MAX_SIZE(bits)							\
 	(((EXT3_NDIR_BLOCKS + (1LL << (bits - 2)) + 				\
@@ -56,8 +52,6 @@
 EXT3_MAX_SIZE(10), EXT3_MAX_SIZE(11), EXT3_MAX_SIZE(12), EXT3_MAX_SIZE(13)
 };
 
-#endif
-
 /*
  * We have mostly NULL's here: the current defaults are ok for
  * the ext3 filesystem.
@@ -70,11 +64,7 @@
 	NULL,			/* poll - default */
 	ext3_ioctl,		/* ioctl */
 	generic_file_mmap,	/* mmap */
-#if BITS_PER_LONG == 64	
 	NULL,			/* no special open is needed */
-#else
-	ext3_open_file,
-#endif
 	NULL,			/* flush */
 	ext3_release_file,	/* release */
 	ext3_sync_file,		/* fsync */
@@ -122,14 +111,9 @@
 		case 1:
 			offset += file->f_pos;
 	}
-#if BITS_PER_LONG < 64
-	if (offset >> 31)
-		return -EINVAL;
-#else
 	if (offset < 0 ||
 	    offset > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(inode->i_sb)])
 		return -EINVAL;
-#endif
 	if (offset != file->f_pos) {
 		file->f_pos = offset;
 		file->f_reada = 0;
@@ -157,12 +141,13 @@
 				size_t count, loff_t *ppos)
 {
 	struct inode * inode = filp->f_dentry->d_inode;
-	off_t pos;
-	long block;
+	loff_t pos;
+	long block, needed;
 	int offset;
-	int written, written_transaction, c, needed, err;
+	size_t written, written_transaction, c;
 	struct buffer_head * bh;
 	struct super_block * sb;
+	int err;
 	int write_error, new_buffer;
 	unsigned long limit;
 	handle_t *handle;
@@ -173,9 +158,9 @@
 		return 0;
 	/* This makes the bounds-checking arithmetic later on much more
 	 * sane. */
-	if (((signed) count) < 0)
- 		return -EINVAL;
-
+	if (((ssize_t) count) < 0)
+		return -EINVAL;
+	
 	jfs_debug(4, "Write for %u at %ld to inode %p\n",
 		  count, (long) *ppos, inode);
 	write_error = 0;
@@ -203,15 +188,45 @@
 		pos = *ppos;
 		if (pos != *ppos)
 			return -EINVAL;
-#if BITS_PER_LONG >= 64
-		if (pos > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)])
-			return -EINVAL;
-#endif
 	}
 
-	block = pos >> EXT3_BLOCK_SIZE_BITS(sb);
-	offset = pos & (sb->s_blocksize - 1);
-	c = sb->s_blocksize - offset;
+	/* Check for overflow.. */
+
+	/* L-F-S spec 2.2.1.27: */
+	if (!(filp->f_flags & O_LARGEFILE)) {
+		if (pos >= 0x7fffffffULL) /* pos 2G forbidden */
+			return -EFBIG;
+
+		if (pos + count > 0x7fffffffULL)
+			/* Write only until end of allowed region */
+			count = 0x7fffffffULL - pos;
+	}
+
+	{
+		loff_t max = ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)];
+
+		if (pos >= max)
+			return -EFBIG;
+
+		if (pos + count > max) {
+			count = max - pos;
+			if (!count)
+				return -EFBIG;
+		}
+	}
+
+	/* From SUS: We must generate a SIGXFSZ for file size overflow
+	 * only if no bytes were actually written to the file. --sct */
+
+	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+		if (pos+count > limit)
+			count = limit - pos;
+	}
 
 	/* will_journal_data must not just check the journaling mode:
 	 * specific files can be marked for data-journaling even with
@@ -235,76 +250,24 @@
 		needed = (count >> EXT3_BLOCK_SIZE_BITS(sb)) + 1;
 		if (needed > EXT3_MAX_TRANS_DATA)
 			needed = EXT3_MAX_TRANS_DATA;
-	} else 
+	} else
 		needed = 0;
-	
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
-		
-	
-	/* Check for overflow.. */
-
-#if BITS_PER_LONG < 64
-	/* If the fd's pos is already greater than or equal to the file
-	 * descriptor's offset maximum, then we need to return EFBIG for
-	 * any non-zero count (and we already tested for zero above). */
-	if (((unsigned) pos) >= 0x7FFFFFFFUL)  {
-		written = -EFBIG;
-		goto error_out;
-	}
-	
-	/* If we are about to overflow the maximum file size, we also
-	 * need to return the error, but only if no bytes can be written
-	 * successfully. */
-	if (((unsigned) pos + count) > 0x7FFFFFFFUL) {
-		count = 0x7FFFFFFFL - pos;
-		if (((signed) count) < 0)  {
-			written = -EFBIG;
-			goto error_out;
-		}
-	}
-#else
-	{
-		off_t max = ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(sb)];
 
-		if (pos >= max) {
-			written = -EFBIG;
-			goto error_out;
-		}
-		
-		if (pos + count > max) {
-			count = max - pos;
-			if (!count) {
-				written = -EFBIG;
-				goto error_out;
-			}
-		}
-		if (((pos + count) >> 31) && 
-		    !EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE)) {
-			/* If this is the first large file created, add a flag
-			   to the superblock */
-			journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
-			ext3_update_fs_rev(sb);
-			EXT3_SET_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-			journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
-		}
-	}
-#endif
+	block = pos >> EXT3_BLOCK_SIZE_BITS(sb);
+	offset = pos & (sb->s_blocksize - 1);
+	c = sb->s_blocksize - offset;
 
-	/* From SUS: We must generate a SIGXFSZ for file size overflow
-	 * only if no bytes were actually written to the file. --sct */
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
 
-	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit < RLIM_INFINITY) {
-		if (((unsigned) pos+count) >= limit) {
-			count = limit - pos;
-			if (((signed) count) <= 0) {
-				send_sig(SIGXFSZ, current, 0);
-				written = -EFBIG;
-				goto error_out;
-			}
-		}
+	if (((pos + count) >> 31) &&
+	    (!EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_LARGE_FILE)||
+	     EXT3_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT3_GOOD_OLD_REV))){
+		/* If this is the first large file, add a flag to superblock */
+		journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
+		ext3_update_fs_rev(sb);
+		EXT3_SET_RO_COMPAT_FEATURE(sb,
+					   EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+		journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
 	}
 
 	written = written_transaction = 0;
@@ -441,7 +399,6 @@
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 	*ppos = pos;
 	ext3_mark_inode_dirty(handle, inode);
-error_out:
 	ext3_journal_stop(handle, inode);
 	return written;
 }
@@ -458,15 +415,3 @@
 	return 0;
 }
 
-#if BITS_PER_LONG < 64
-/*
- * Called when an inode is about to be open.
- * We use this to disallow opening RW large files on 32bit systems.
- */
-static int ext3_open_file (struct inode * inode, struct file * filp)
-{
-	if (inode->u.ext3_i.i_high_size && (filp->f_mode & FMODE_WRITE))
-		return -EFBIG;
-	return 0;
-}
-#endif
--- linux/fs/ext3/inode.c.orig	Fri Jan  5 22:11:00 2001
+++ linux/fs/ext3/inode.c	Mon Jan 15 16:25:05 2001
@@ -626,15 +626,8 @@
 		inode->u.ext3_i.i_dir_acl = le32_to_cpu(iloc.raw_inode->i_dir_acl);
 	else {
 		inode->u.ext3_i.i_dir_acl = 0;
-		inode->u.ext3_i.i_high_size =
-			le32_to_cpu(iloc.raw_inode->i_size_high);
-#if BITS_PER_LONG < 64
-		if (iloc.raw_inode->i_size_high)
-			inode->i_size = (__u32)-1;
-#else
-		inode->i_size |= ((__u64)le32_to_cpu(iloc.raw_inode->i_size_high))
-			<< 32;
-#endif
+		inode->i_size = ((__u64)(inode->i_size & 0xFFFFFFFFUL)) |
+			(((__u64)le32_to_cpu(iloc.raw_inode->i_size_high))<<32);
 	}
 	inode->u.ext3_i.i_disksize = inode->i_size;
 	inode->u.ext3_i.i_block_group = iloc.block_group;
@@ -725,14 +718,8 @@
 	raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
 	if (S_ISDIR(inode->i_mode))
 		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
-	else { 
-#if BITS_PER_LONG < 64
-		raw_inode->i_size_high =
-			cpu_to_le32(inode->u.ext3_i.i_high_size);
-#else
+	else
 		raw_inode->i_size_high = cpu_to_le32(inode->u.ext3_i.i_disksize >> 32);
-#endif
-	}
 	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
 		raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
 	else for (block = 0; block < EXT3_N_BLOCKS; block++)
@@ -843,60 +829,51 @@
 		goto out;
 
 	if (iattr->ia_valid & ATTR_SIZE) {
-		off_t size = iattr->ia_size;
+		loff_t size = iattr->ia_size;
 		unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
 
 		if (size < 0)
 			return -EINVAL;
-#if BITS_PER_LONG == 64	
 		if (size > ext3_max_sizes[EXT3_BLOCK_SIZE_BITS(inode->i_sb)])
 			return -EFBIG;
-#endif
- 		if (limit < RLIM_INFINITY && size > limit) {
+		if (limit != RLIM_INFINITY && size > limit) {
 			send_sig(SIGXFSZ, current, 0);
 			return -EFBIG;
 		}
-
-#if BITS_PER_LONG == 64	
-		if (size >> 33) {
-			struct super_block *sb = inode->i_sb;
-			struct ext3_super_block *es = sb->u.ext3_sb.s_es;
-
-			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE)) {
-				struct buffer_head *bh = sb->u.ext3_sb.s_sbh;
-				
-				handle = ext3_journal_start(inode, 1);
-				if (IS_ERR(handle))
-					return PTR_ERR(handle);
-
-				/* If this is the first large file
-				 * created, add a flag to the superblock */
-				ext3_update_fs_rev(sb);
-				EXT3_SET_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
-				journal_dirty_metadata(handle, bh); /*@@@err*/
-				ext3_journal_stop(handle, inode);
-			}
-		}
-#endif
 	}
-	
 
 	retval = inode_change_ok(inode, iattr);
 	if (retval != 0)
 		goto out;
 
 	/* Notify-change transaction.  The maximum number of buffers
-	 * required is one. */
+	 * required is two (inode and maybe superblock if a large file). */
 
-	handle = ext3_journal_start(inode, 1);
+	handle = ext3_journal_start(inode, 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	retval = ext3_reserve_inode_write(handle, inode, &iloc);
-	if (retval) 			
+	if (retval)
 		goto out_stop;
-	
+
+	if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size >> 31) {
+		struct super_block *sb = inode->i_sb;
+
+		if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+		    EXT3_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			/* If this is the first large file created,
+			 * add a flag to the superblock */
+			/* FIXME do we need journal_write_access() on sb? */
+			ext3_update_fs_rev(sb);
+			EXT3_SET_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+			/*@@@err*/
+			journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
+		}
+	}
+
 	inode_setattr(inode, iattr);
 
 	if (iattr->ia_valid & ATTR_ATTR_FLAG) {
--- linux/include/linux/ext3_fs_i.h.orig	Fri Jan  5 22:10:59 2001
+++ linux/include/linux/ext3_fs_i.h	Mon Jan 15 13:37:50 2001
@@ -45,7 +45,7 @@
 	 * in memory.  During truncate, i_size is set to 0 by the VFS
 	 * but the filesystem won't set i_disksize to 0 until the
 	 * truncate is actually under way. */
-	off_t	i_disksize;
+	loff_t	i_disksize;
 };
 
 #endif	/* _LINUX_EXT3_FS_I */
-- 
Andreas Dilger  \ "If a man ate a pound of pasta and a pound of antipasto,
                 \  would they cancel out, leaving him still hungry?"
http://www-mddsp.enel.ucalgary.ca/People/adilger/               -- Dogbert





[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]