[dm-devel] [PATCH, RFC 2/2] dm: support REQ_FLUSH directly
Christoph Hellwig
hch at lst.de
Tue Aug 3 18:51:48 UTC 2010
Adapt device-mapper to the new world order where even bio based devices
get simple REQ_FLUSH requests for cache flushes, and need to submit
them downwards for implementing barriers.
Note that I've removed the unlikely statements around the REQ_FLUSH
checks. While these generally aren't as common as normal read/writes
they are common enough that statically mispredictim them is a really
bad idea.
Tested with simple linear LVM volumes only so far.
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c 2010-08-03 20:26:49.629254174 +0200
+++ linux-2.6/drivers/md/dm-crypt.c 2010-08-03 20:36:59.279003929 +0200
@@ -1249,7 +1249,7 @@ static int crypt_map(struct dm_target *t
struct dm_crypt_io *io;
struct crypt_config *cc;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
cc = ti->private;
bio->bi_bdev = cc->dev->bdev;
return DM_MAPIO_REMAPPED;
Index: linux-2.6/drivers/md/dm-raid1.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-raid1.c 2010-08-03 20:26:49.641003999 +0200
+++ linux-2.6/drivers/md/dm-raid1.c 2010-08-03 20:36:59.280003649 +0200
@@ -629,7 +629,7 @@ static void do_write(struct mirror_set *
struct dm_io_region io[ms->nr_mirrors], *dest = io;
struct mirror *m;
struct dm_io_request io_req = {
- .bi_rw = WRITE | (bio->bi_rw & WRITE_BARRIER),
+ .bi_rw = WRITE | (bio->bi_rw & (WRITE_BARRIER|REQ_FLUSH)),
.mem.type = DM_IO_BVEC,
.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
.notify.fn = write_callback,
@@ -670,7 +670,7 @@ static void do_writes(struct mirror_set
bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) {
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio_list_add(&sync, bio);
continue;
}
@@ -1199,12 +1199,14 @@ static int mirror_end_io(struct dm_targe
struct dm_bio_details *bd = NULL;
struct dm_raid1_read_record *read_record = map_context->ptr;
+ if (bio->bi_rw & REQ_FLUSH)
+ return error;
+
/*
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
- if (likely(!bio_empty_barrier(bio)))
- dm_rh_dec(ms->rh, map_context->ll);
+ dm_rh_dec(ms->rh, map_context->ll);
return error;
}
Index: linux-2.6/drivers/md/dm-region-hash.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-region-hash.c 2010-08-03 20:26:49.650023346 +0200
+++ linux-2.6/drivers/md/dm-region-hash.c 2010-08-03 20:36:59.285025649 +0200
@@ -399,7 +399,7 @@ void dm_rh_mark_nosync(struct dm_region_
region_t region = dm_rh_bio_to_region(rh, bio);
int recovering = 0;
- if (bio_empty_barrier(bio)) {
+ if (bio->bi_rw & REQ_FLUSH) {
rh->barrier_failure = 1;
return;
}
@@ -524,7 +524,7 @@ void dm_rh_inc_pending(struct dm_region_
struct bio *bio;
for (bio = bios->head; bio; bio = bio->bi_next) {
- if (bio_empty_barrier(bio))
+ if (bio->bi_rw & REQ_FLUSH)
continue;
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
}
Index: linux-2.6/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-snap.c 2010-08-03 20:26:49.656003091 +0200
+++ linux-2.6/drivers/md/dm-snap.c 2010-08-03 20:36:59.290023135 +0200
@@ -1581,7 +1581,7 @@ static int snapshot_map(struct dm_target
chunk_t chunk;
struct dm_snap_pending_exception *pe = NULL;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
bio->bi_bdev = s->cow->bdev;
return DM_MAPIO_REMAPPED;
}
@@ -1685,7 +1685,7 @@ static int snapshot_merge_map(struct dm_
int r = DM_MAPIO_REMAPPED;
chunk_t chunk;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
if (!map_context->flush_request)
bio->bi_bdev = s->origin->bdev;
else
@@ -2123,7 +2123,7 @@ static int origin_map(struct dm_target *
struct dm_dev *dev = ti->private;
bio->bi_bdev = dev->bdev;
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
return DM_MAPIO_REMAPPED;
/* Only tell snapshots if this is a write */
Index: linux-2.6/drivers/md/dm-stripe.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-stripe.c 2010-08-03 20:26:49.663003301 +0200
+++ linux-2.6/drivers/md/dm-stripe.c 2010-08-03 20:36:59.295005744 +0200
@@ -214,7 +214,7 @@ static int stripe_map(struct dm_target *
sector_t offset, chunk;
uint32_t stripe;
- if (unlikely(bio_empty_barrier(bio))) {
+ if (bio->bi_rw & REQ_FLUSH) {
BUG_ON(map_context->flush_request >= sc->stripes);
bio->bi_bdev = sc->stripe[map_context->flush_request].dev->bdev;
return DM_MAPIO_REMAPPED;
Index: linux-2.6/drivers/md/dm.c
===================================================================
--- linux-2.6.orig/drivers/md/dm.c 2010-08-03 20:26:49.676004139 +0200
+++ linux-2.6/drivers/md/dm.c 2010-08-03 20:36:59.301005325 +0200
@@ -633,7 +633,7 @@ static void dec_pending(struct dm_io *io
io_error = io->error;
bio = io->bio;
- if (bio->bi_rw & REQ_HARDBARRIER) {
+ if (bio == &md->barrier_bio) {
/*
* There can be just one barrier request so we use
* a per-device variable for error reporting.
@@ -851,7 +851,7 @@ void dm_requeue_unmapped_request(struct
struct request_queue *q = rq->q;
unsigned long flags;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
@@ -950,7 +950,7 @@ static void dm_complete_request(struct r
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request. So can't use
* softirq_done with the original.
@@ -979,7 +979,7 @@ void dm_kill_unmapped_request(struct req
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
- if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
+ if (clone->cmd_flags & REQ_HARDBARRIER) {
/*
* Barrier clones share an original request.
* Leave it to dm_end_request(), which handles this special
@@ -1208,7 +1208,7 @@ static int __clone_and_map(struct clone_
sector_t len = 0, max;
struct dm_target_io *tio;
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
return __clone_and_map_empty_barrier(ci);
ti = dm_table_find_target(ci->map, ci->sector);
@@ -1308,7 +1308,7 @@ static void __split_and_process_bio(stru
ci.map = dm_get_live_table(md);
if (unlikely(!ci.map)) {
- if (!(bio->bi_rw & REQ_HARDBARRIER))
+ if (bio != &md->barrier_bio)
bio_io_error(bio);
else
if (!md->barrier_error)
@@ -1326,7 +1326,7 @@ static void __split_and_process_bio(stru
spin_lock_init(&ci.io->endio_lock);
ci.sector = bio->bi_sector;
ci.sector_count = bio_sectors(bio);
- if (unlikely(bio_empty_barrier(bio)))
+ if (bio->bi_rw & REQ_FLUSH)
ci.sector_count = 1;
ci.idx = bio->bi_idx;
@@ -1421,7 +1421,7 @@ static int _dm_request(struct request_qu
* we have to queue this io for later.
*/
if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
- unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
+ unlikely(bio->bi_rw & (REQ_HARDBARRIER|REQ_FLUSH))) {
up_read(&md->io_lock);
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1462,14 +1462,6 @@ static int dm_request(struct request_que
return _dm_request(q, bio);
}
-static bool dm_rq_is_flush_request(struct request *rq)
-{
- if (rq->cmd_flags & REQ_FLUSH)
- return true;
- else
- return false;
-}
-
void dm_dispatch_request(struct request *rq)
{
int r;
@@ -1517,10 +1509,10 @@ static int setup_clone(struct request *c
{
int r;
- if (dm_rq_is_flush_request(rq)) {
+ if (rq->cmd_flags & REQ_FLUSH) {
blk_rq_init(NULL, clone);
clone->cmd_type = REQ_TYPE_FS;
- clone->cmd_flags |= (REQ_HARDBARRIER | WRITE);
+ clone->cmd_flags |= (WRITE_SYNC | REQ_FLUSH);
} else {
r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC,
dm_rq_bio_constructor, tio);
@@ -1573,7 +1565,7 @@ static int dm_prep_fn(struct request_que
struct mapped_device *md = q->queuedata;
struct request *clone;
- if (unlikely(dm_rq_is_flush_request(rq)))
+ if (rq->cmd_flags & REQ_FLUSH)
return BLKPREP_OK;
if (unlikely(rq->special)) {
@@ -1664,7 +1656,7 @@ static void dm_request_fn(struct request
if (!rq)
goto plug_and_out;
- if (unlikely(dm_rq_is_flush_request(rq))) {
+ if (rq->cmd_flags & REQ_FLUSH) {
BUG_ON(md->flush_request);
md->flush_request = rq;
blk_start_request(rq);
@@ -2239,7 +2231,7 @@ static void dm_flush(struct mapped_devic
bio_init(&md->barrier_bio);
md->barrier_bio.bi_bdev = md->bdev;
- md->barrier_bio.bi_rw = WRITE_BARRIER;
+ md->barrier_bio.bi_rw = WRITE_SYNC | REQ_FLUSH;
__split_and_process_bio(md, &md->barrier_bio);
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
@@ -2250,19 +2242,8 @@ static void process_barrier(struct mappe
md->barrier_error = 0;
dm_flush(md);
-
- if (!bio_empty_barrier(bio)) {
- __split_and_process_bio(md, bio);
- dm_flush(md);
- }
-
- if (md->barrier_error != DM_ENDIO_REQUEUE)
- bio_endio(bio, md->barrier_error);
- else {
- spin_lock_irq(&md->deferred_lock);
- bio_list_add_head(&md->deferred, bio);
- spin_unlock_irq(&md->deferred_lock);
- }
+ __split_and_process_bio(md, bio);
+ dm_flush(md);
}
/*
Index: linux-2.6/include/linux/bio.h
===================================================================
--- linux-2.6.orig/include/linux/bio.h 2010-08-03 20:32:11.951274008 +0200
+++ linux-2.6/include/linux/bio.h 2010-08-03 20:36:59.303005325 +0200
@@ -241,10 +241,6 @@ enum rq_flag_bits {
#define bio_offset(bio) bio_iovec((bio))->bv_offset
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio) \
- ((bio->bi_rw & REQ_HARDBARRIER) && \
- !bio_has_data(bio) && \
- !(bio->bi_rw & REQ_DISCARD))
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
Index: linux-2.6/drivers/md/dm-io.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-io.c 2010-08-03 20:26:49.685023485 +0200
+++ linux-2.6/drivers/md/dm-io.c 2010-08-03 20:36:59.308004417 +0200
@@ -364,7 +364,7 @@ static void dispatch_io(int rw, unsigned
*/
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
- if (where[i].count || (rw & REQ_HARDBARRIER))
+ if (where[i].count || (rw & REQ_FLUSH))
do_region(rw, i, where + i, dp, io);
}
More information about the dm-devel
mailing list