[linux-lvm] Re: dm, dm_table, request_queue

Miquel van Smoorenburg miquels at cistron.nl
Mon Feb 23 19:20:12 UTC 2004


On Mon, 23 Feb 2004 21:37:31, Joe Thornber wrote:
> On Mon, Feb 23, 2004 at 05:31:08PM +0100, Miquel van Smoorenburg wrote:
> > And, in general, do you feel this is the correct solution ?
> 
> No, I'm going to do what Jens Axboe suggested and add a congestion
> function into the request_queue itself.

Okay, but that means that we need to add a request_queue pointer to
the struct backing_dev_info. My impression was that Jens didn't like
that either.

> dm will therefore provide its
> own congestion function that takes the targets into account.  I think
> this is the opposite of what you're suggesting, devices look downwards
> to their children devices rather than up to their parents.

That was what I suggested, and I also implemented it this night. It
works, but it is quite complicated - more so than the other way around.
It does keep the simplicity in struct backing_dev_info, that's readable
directly without calling indirect functions.

All in all, I think that the first approach is the simplest and
therefor the best.

So how does this look:

queue_congestion_fn.patch

--- linux-2.6.3.orig/include/linux/backing-dev.h	2004-02-04 04:43:38.000000000 +0100
+++ linux-2.6.3/include/linux/backing-dev.h	2004-02-24 00:56:46.000000000 +0100
@@ -24,6 +24,7 @@
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	int memory_backed;	/* Cannot clean pages with writepage */
+	void *request_queue;	/* Request queue (if this device has one) */
 };
 
 extern struct backing_dev_info default_backing_dev_info;
@@ -32,14 +33,7 @@
 int writeback_in_progress(struct backing_dev_info *bdi);
 void writeback_release(struct backing_dev_info *bdi);
 
-static inline int bdi_read_congested(struct backing_dev_info *bdi)
-{
-	return test_bit(BDI_read_congested, &bdi->state);
-}
-
-static inline int bdi_write_congested(struct backing_dev_info *bdi)
-{
-	return test_bit(BDI_write_congested, &bdi->state);
-}
+extern int bdi_read_congested(struct backing_dev_info *bdi);
+extern int bdi_write_congested(struct backing_dev_info *bdi);
 
 #endif		/* _LINUX_BACKING_DEV_H */
--- linux-2.6.3.orig/include/linux/blkdev.h	2004-02-22 13:52:17.000000000 +0100
+++ linux-2.6.3/include/linux/blkdev.h	2004-02-24 00:59:29.000000000 +0100
@@ -244,6 +244,7 @@
 typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
 typedef int (prep_rq_fn) (request_queue_t *, struct request *);
 typedef void (unplug_fn) (void *q);
+typedef int (congested_fn) (request_queue_t *, int rw);
 
 struct bio_vec;
 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
@@ -290,6 +291,7 @@
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	activity_fn		*activity_fn;
+	congested_fn		*congested_fn;
 
 	/*
 	 * Auto-unplugging state
@@ -514,6 +516,7 @@
 extern void __blk_stop_queue(request_queue_t *q);
 extern void blk_run_queue(request_queue_t *q);
 extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
+extern void blk_queue_congested_fn(request_queue_t *, congested_fn *);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
--- linux-2.6.3.orig/drivers/block/ll_rw_blk.c	2004-02-04 04:43:10.000000000 +0100
+++ linux-2.6.3/drivers/block/ll_rw_blk.c	2004-02-24 01:14:12.000000000 +0100
@@ -147,6 +147,35 @@
 
 EXPORT_SYMBOL(blk_queue_activity_fn);
 
+void blk_queue_congested_fn(request_queue_t *q, congested_fn *fn)
+{
+	q->congested_fn = fn;
+}
+
+EXPORT_SYMBOL(blk_queue_congested_fn);
+
+int bdi_read_congested(struct backing_dev_info *bdi)
+{
+	request_queue_t *q = bdi->request_queue;
+
+	if (!q || !q->congested_fn)
+		return test_bit(BDI_read_congested, &bdi->state);
+	return q->congested_fn(q, READ);
+}
+
+EXPORT_SYMBOL(bdi_read_congested);
+
+int bdi_write_congested(struct backing_dev_info *bdi)
+{
+	request_queue_t *q = bdi->request_queue;
+
+	if (!q || !q->congested_fn)
+		return test_bit(BDI_write_congested, &bdi->state);
+	return q->congested_fn(q, WRITE);
+}
+
+EXPORT_SYMBOL(bdi_write_congested);
+
 /**
  * blk_queue_prep_rq - set a prepare_request function for queue
  * @q:		queue
@@ -1372,6 +1401,7 @@
 	memset(q, 0, sizeof(*q));
 	init_timer(&q->unplug_timer);
 	atomic_set(&q->refcnt, 1);
+	q->backing_dev_info.request_queue = q;
 	return q;
 }
 
--- linux-2.6.3.orig/drivers/md/dm.h	2004-02-04 04:43:45.000000000 +0100
+++ linux-2.6.3/drivers/md/dm.h	2004-02-24 00:23:58.000000000 +0100
@@ -115,6 +115,7 @@
 int dm_table_get_mode(struct dm_table *t);
 void dm_table_suspend_targets(struct dm_table *t);
 void dm_table_resume_targets(struct dm_table *t);
+int dm_table_any_congested(struct dm_table *t, int rw);
 
 /*-----------------------------------------------------------------
  * A registry of target types.
--- linux-2.6.3.orig/drivers/md/dm-table.c	2004-02-04 04:44:59.000000000 +0100
+++ linux-2.6.3/drivers/md/dm-table.c	2004-02-24 00:45:46.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/ctype.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/backing-dev.h>
 #include <asm/atomic.h>
 
 #define MAX_DEPTH 16
@@ -857,6 +858,23 @@
 	}
 }
 
+int dm_table_any_congested(struct dm_table *t, int rw)
+{
+	struct list_head *d, *devices;
+	int r = 0;
+
+	devices = dm_table_get_devices(t);
+	for (d = devices->next; d != devices; d = d->next) {
+		struct dm_dev *dd = list_entry(d, struct dm_dev, list);
+		request_queue_t *q = bdev_get_queue(dd->bdev);
+		if (rw == WRITE)
+			r |= bdi_write_congested(&q->backing_dev_info);
+		else
+			r |= bdi_read_congested(&q->backing_dev_info);
+	}
+
+	return r;
+}
 
 EXPORT_SYMBOL(dm_get_device);
 EXPORT_SYMBOL(dm_put_device);
--- linux-2.6.3.orig/drivers/md/dm.c	2004-02-22 13:52:15.000000000 +0100
+++ linux-2.6.3/drivers/md/dm.c	2004-02-24 01:00:13.000000000 +0100
@@ -526,6 +526,18 @@
 	return 0;
 }
 
+static int dm_any_congested(struct request_queue *q, int rw)
+{
+	int r;
+	struct mapped_device *md = q->queuedata;
+
+	down_read(&md->lock);
+	r = dm_table_any_congested(md->map, rw);
+	up_read(&md->lock);
+
+	return r;
+}
+
 /*-----------------------------------------------------------------
  * A bitset is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
@@ -608,6 +620,7 @@
 	}
 
 	md->queue->queuedata = md;
+	blk_queue_congested_fn(md->queue, dm_any_congested);
 	blk_queue_make_request(md->queue, dm_request);
 
 	md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,




More information about the linux-lvm mailing list