[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: tytso's readdir speedup patch - adoptable?



I ported my readdir speedup patch to ext3 while I was flying back to
Boston yesterday.  As I feared, I essentially ended up having to
rewrite the ext3_find_entry() from scratch, since the original version
was too fragile to modify safely.  But it wasn't all bad.  This new
version is substantially easier to understand, fixes a bug and a race
condition were in the original version, is ~110 bytes smaller (not all
things get bigger with successive versions :-), and has the readdir
speedup enhancement.

Porting this patch to 2.2 ext2 code base should be pretty trivial, if
anyone cares to do it.

Because this is a complete rewrite of ext3_find_entry(), I don't
recommend that folks try using it in production systems at this time;
wait until other people have looked it over and it gets folded into
the official ext3 releases.  But I would appreciate people willing to
try out the patch and letting me know how well it works for them.
I've desk checked it carefully, and believe it to be Bugfree(tm), but
some additional eyes to check it would be good....

(If you do want to audit the changes, I advise that you apply the
patch to a scratch tree and then look at the search_dirblock and
ext3_find_entry functions; so much as changed that simply examining
the diff will likely not be the most efficient way to go about things.)

						- Ted

===== fs/ext3/namei.c 1.2 vs edited =====
--- 1.2/fs/ext3/namei.c	Tue Aug 21 07:08:28 2001
+++ edited/fs/ext3/namei.c	Wed Aug 22 10:53:11 2001
@@ -55,6 +55,46 @@
 }
 
 /*
+ * Returns 0 if not found, -1 on failure, and 1 on success
+ */
+static int inline search_dirblock(struct buffer_head * bh,
+				  struct inode *dir,
+				  struct dentry *dentry,
+				  unsigned long offset,
+				  struct ext3_dir_entry_2 ** res_dir)
+{
+	struct ext3_dir_entry_2 * de;
+	char * dlimit;
+	int de_len;
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	dlimit = bh->b_data + dir->i_sb->s_blocksize;
+	while ((char *) de < dlimit) {
+		/* this code is executed quadratically often */
+		/* do minimal checking `by hand' */
+
+		if ((char *) de + namelen <= dlimit &&
+		    ext3_match (namelen, name, de)) {
+			/* found a match - just to be sure, do a full check */
+			if (!ext3_check_dir_entry("ext3_find_entry",
+						  dir, de, bh, offset))
+				return -1;
+			*res_dir = de;
+			return 1;
+		}
+		/* prevent looping on a bad block */
+		de_len = le16_to_cpu(de->rec_len);
+		if (de_len <= 0)
+			return -1;
+		offset += de_len;
+		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
+	}
+	return 0;
+}
+
+/*
  *	ext3_find_entry()
  *
  * finds an entry in the specified directory with the wanted name. It
@@ -70,105 +110,79 @@
 {
 	struct super_block * sb;
 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
-	struct buffer_head * bh_read[NAMEI_RA_SIZE];
-	unsigned long offset;
-	int block, toread, i, err;
+	struct buffer_head * bh, *ret = NULL;
+	unsigned long start, block, b;
+	int ra_ptr = 0, ra_max = 0, num = 0;
+	int nblocks, i, err;
 	struct inode *dir = dentry->d_parent->d_inode;
-	const char *name = dentry->d_name.name;
-	int namelen = dentry->d_name.len;
 
 	*res_dir = NULL;
 	sb = dir->i_sb;
 
-	if (namelen > EXT3_NAME_LEN)
-		return NULL;
-
-	memset (bh_use, 0, sizeof (bh_use));
-	toread = 0;
-	for (block = 0; block < NAMEI_RA_SIZE; ++block) {
-		struct buffer_head * bh;
-
-		if ((block << EXT3_BLOCK_SIZE_BITS (sb)) >= dir->i_size)
-			break;
-		bh = ext3_getblk (NULL, dir, block, 0, &err);
-		bh_use[block] = bh;
-		if (bh && !buffer_uptodate(bh))
-			bh_read[toread++] = bh;
-	}
-
-	for (block = 0, offset = 0; offset < dir->i_size; block++) {
-		struct buffer_head * bh;
-		struct ext3_dir_entry_2 * de;
-		char * dlimit;
-
-		if ((block % NAMEI_RA_BLOCKS) == 0 && toread) {
-			ll_rw_block (READ, toread, bh_read);
-			toread = 0;
-		}
-		bh = bh_use[block % NAMEI_RA_SIZE];
-		if (!bh) {
-#if 0
-			ext3_error (sb, "ext3_find_entry",
-				"directory #%lu contains a hole at offset %lu",
-				    dir->i_ino, offset);
-#endif
-			offset += sb->s_blocksize;
-			continue;
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	start = dir->u.ext3_i.i_dir_start_lookup;
+	if (start >= nblocks)
+		start = 0;
+	block = start;
+restart:
+	do {
+		/*
+		 * We deal with the read-ahead logic here.
+		 */
+		if (ra_ptr >= ra_max) {
+			ra_ptr = 0;
+			b = block;
+			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
+				if (b >= nblocks ||
+				    (num && block == start))
+					break;
+				num++;
+				bh = ext3_getblk(NULL, dir, b++, 0, &err);
+				bh_use[ra_max] = bh;
+				if (bh)
+					ll_rw_block(READ, 1, &bh);
+			}
 		}
-		wait_on_buffer (bh);
+		if ((bh = bh_use[ra_ptr++]) == NULL)
+			goto next;
+		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			/*
-			 * read error: all bets are off
-			 */
-			break;
+			/* read error, skip block & hope for the best */
+			brelse(bh);
+			goto next;
 		}
-
-		de = (struct ext3_dir_entry_2 *) bh->b_data;
-		dlimit = bh->b_data + sb->s_blocksize;
-		while ((char *) de < dlimit) {
-			/* this code is executed quadratically often */
-			/* do minimal checking `by hand' */
-			int de_len;
-
-			if ((char *) de + namelen <= dlimit &&
-			    ext3_match (namelen, name, de)) {
-				/* found a match -
-				   just to be sure, do a full check */
-				if (!ext3_check_dir_entry("ext3_find_entry",
-							  dir, de, bh, offset))
-					goto failure;
-				for (i = 0; i < NAMEI_RA_SIZE; ++i) {
-					if (bh_use[i] != bh)
-						brelse (bh_use[i]);
-				}
-				*res_dir = de;
-				return bh;
-			}
-			/* prevent looping on a bad block */
-			de_len = le16_to_cpu(de->rec_len);
-			if (de_len <= 0)
-				goto failure;
-			offset += de_len;
-			de = (struct ext3_dir_entry_2 *)
-				((char *) de + de_len);
+		i = search_dirblock(bh, dir, dentry,
+			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+		if (i == 1) {
+			dir->u.ext3_i.i_dir_start_lookup = block;
+			ret = bh;
+			goto cleanup_and_exit;
+		} else {
+			brelse(bh);
+			if (i < 0)
+				goto cleanup_and_exit;
 		}
-
-		brelse (bh);
-		if (((block + NAMEI_RA_SIZE) << EXT3_BLOCK_SIZE_BITS (sb)) >=
-		    dir->i_size)
-			bh = NULL;
-		else
-			bh = ext3_getblk (NULL, dir,
-					block + NAMEI_RA_SIZE, 0, &err);
-		bh_use[block % NAMEI_RA_SIZE] = bh;
-		if (bh && !buffer_uptodate(bh))
-			bh_read[toread++] = bh;
+	next:
+		if (++block >= nblocks)
+			block = 0;
+	} while (block != start);
+
+	/*
+	 * If the directory has grown while we were searching, then
+	 * search the last part of the directory before giving up.
+	 */
+	block = nblocks;
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	if (block < nblocks) {
+		start = 0;
+		goto restart;
 	}
-
-failure:
-	for (i = 0; i < NAMEI_RA_SIZE; ++i)
-		brelse (bh_use[i]);
-	return NULL;
+		
+cleanup_and_exit:
+	/* Clean up the read-ahead blocks */
+	for (; ra_ptr < ra_max; ra_ptr++)
+		brelse (bh_use[ra_ptr]);
+	return ret;
 }
 
 static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
===== include/linux/ext3_fs_i.h 1.1 vs edited =====
--- 1.1/include/linux/ext3_fs_i.h	Tue Aug 21 07:03:44 2001
+++ edited/include/linux/ext3_fs_i.h	Tue Aug 21 12:50:36 2001
@@ -41,6 +41,7 @@
 	__u32	i_prealloc_block;
 	__u32	i_prealloc_count;
 #endif
+	__u32	i_dir_start_lookup;
 	
 	struct list_head i_orphan;	/* unlinked but open inodes */
 





[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]