[dm-devel] [PATCH] dm-bufio
Joe Thornber
thornber at redhat.com
Mon Oct 17 10:08:10 UTC 2011
On Fri, Oct 14, 2011 at 03:14:34PM -0400, Mikulas Patocka wrote:
> Hi
>
> This is a patch for dm-bufio.
>
> Changes:
> * fixed a bug in i/o submission, introduced by someone who changed the
> code
Could you point me at the specific part of this patch that does this
please?
> * simplified some constructs
> * more likely/unlikely hints
They clutter the code, and have been used without discrimination. What
is the point of using it on a slow path?
> * dm-bufio.h moved from drivers/md to include/linux
Who outside dm do you expect to use this?
> * put cond_resched() to loops (it was there originally and then someone
> deleted it)
If you're going to use cond_resched() at least do so a little more
intelligently than putting it in _every_ loop. For instance you call it on
every iteration of a sweep through the hash table. The call to
cond_resched will take more time than the loop body. At least make a
change so it's only done every n'th iteration.
Can I also point out that I asked you to make a lot of these changes
over a year ago. You've only got yourself to blame if 'someone' does
it for you.
- Someone
>
> Mikulas
>
> ---
> drivers/md/Kconfig | 2
> drivers/md/dm-bufio.c | 95 ++++++++++++++++++++++++----------------
> drivers/md/dm-bufio.h | 110 -----------------------------------------------
> include/linux/dm-bufio.h | 110 +++++++++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 169 insertions(+), 148 deletions(-)
>
> Index: linux-3.1-rc3-fast/drivers/md/Kconfig
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/Kconfig 2011-10-14 20:56:45.000000000 +0200
> +++ linux-3.1-rc3-fast/drivers/md/Kconfig 2011-10-14 20:56:46.000000000 +0200
> @@ -209,7 +209,7 @@ config DM_DEBUG
> If unsure, say N.
>
> config DM_BUFIO
> - tristate
> + tristate "Buffered IO"
> depends on BLK_DEV_DM && EXPERIMENTAL
> ---help---
> This interface allows you to do buffered I/O on a device and acts
> Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.c
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.c 2011-10-14 20:56:45.000000000 +0200
> +++ linux-3.1-rc3-fast/drivers/md/dm-bufio.c 2011-10-14 20:57:10.000000000 +0200
> @@ -167,6 +167,12 @@ static void dm_bufio_unlock(struct dm_bu
> mutex_unlock(&c->lock);
> }
>
> +#ifdef CONFIG_PREEMPT
> +#define dm_bufio_cond_resched() do { } while (0)
> +#else
> +#define dm_bufio_cond_resched() cond_resched()
> +#endif
> +
> /*----------------------------------------------------------------*/
>
> /* Default cache size --- available memory divided by the ratio */
> @@ -470,17 +476,18 @@ static void use_inline_bio(struct dm_buf
> ptr = b->data;
> len = b->c->block_size;
>
> - if (len >= PAGE_SIZE)
> + if (likely(len >= PAGE_SIZE))
> BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1));
> else
> BUG_ON((unsigned long)ptr & (len - 1));
>
> do {
> - if (!bio_add_page(&b->bio, virt_to_page(ptr),
> - len < PAGE_SIZE ? len : PAGE_SIZE,
> - virt_to_phys(ptr) & (PAGE_SIZE - 1))) {
> + if (unlikely(!bio_add_page(&b->bio, virt_to_page(ptr),
> + len < PAGE_SIZE ? len : PAGE_SIZE,
> + virt_to_phys(ptr) & (PAGE_SIZE - 1)))) {
> BUG_ON(b->c->block_size <= PAGE_SIZE);
> use_dmio(b, rw, block, end_io);
> + return;
> }
>
> len -= PAGE_SIZE;
> @@ -493,8 +500,10 @@ static void use_inline_bio(struct dm_buf
> static void submit_io(struct dm_buffer *b, int rw, sector_t block,
> bio_end_io_t *end_io)
> {
> - if (b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE &&
> - b->data_mode != DATA_MODE_VMALLOC)
> + if (rw == WRITE && b->c->write_callback)
> + b->c->write_callback(b);
> + if (likely(b->c->block_size <= DM_BUFIO_INLINE_VECS * PAGE_SIZE) &&
> + likely(b->data_mode != DATA_MODE_VMALLOC))
> use_inline_bio(b, rw, block, end_io);
> else
> use_dmio(b, rw, block, end_io);
> @@ -514,7 +523,7 @@ static void write_endio(struct bio *bio,
> {
> struct dm_buffer *b = container_of(bio, struct dm_buffer, bio);
> b->write_error = error;
> - if (error) {
> + if (unlikely(error)) {
> struct dm_bufio_client *c = b->c;
> (void)cmpxchg(&c->async_write_error, 0, error);
> }
> @@ -550,8 +559,6 @@ static void __write_dirty_buffer(struct
> clear_bit(B_DIRTY, &b->state);
> wait_on_bit_lock(&b->state, B_WRITING,
> do_io_schedule, TASK_UNINTERRUPTIBLE);
> - if (b->c->write_callback)
> - b->c->write_callback(b);
> submit_io(b, WRITE, b->block, write_endio);
> }
>
> @@ -572,7 +579,7 @@ static void __make_buffer_clean(struct d
>
> /*
> * Find some buffer that is not held by anybody, clean it, unlink it and
> - * return it. If "wait" is zero, try less hard and don't block.
> + * return it.
> */
> static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
> {
> @@ -585,6 +592,7 @@ static struct dm_buffer *__get_unclaimed
> __unlink_buffer(b);
> return b;
> }
> + dm_bufio_cond_resched();
> }
>
> list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
> @@ -594,6 +602,7 @@ static struct dm_buffer *__get_unclaimed
> __unlink_buffer(b);
> return b;
> }
> + dm_bufio_cond_resched();
> }
> return NULL;
> }
> @@ -636,18 +645,21 @@ static struct dm_buffer *__alloc_buffer_
> * This is useful for debugging. When we set cache size to 1,
> * no new buffers are allocated at all.
> */
> - if (dm_bufio_cache_size_latch != 1) {
> + if (likely(dm_bufio_cache_size_latch != 1)) {
> /*
> - * dm-bufio is resistant to allocation failures (it just keeps
> - * one buffer reserved in cases all the allocations fail).
> + * dm-bufio is resistant to allocation failures (it
> + * just keeps one buffer reserved in cases all the
> + * allocations fail).
> * So set flags to not try too hard:
> * GFP_NOIO: don't recurse into the I/O layer
> - * __GFP_NORETRY: don't retry and rather return failure
> + * __GFP_NORETRY: don't retry and rather return
> + * failure
> * __GFP_NOMEMALLOC: don't use emergency reserves
> - * __GFP_NOWARN: don't print a warning in case of failure
> + * __GFP_NOWARN: don't print a warning in case of
> + * failure
> */
> b = alloc_buffer(c, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
> - if (b)
> + if (likely(b != NULL))
> return b;
> }
>
> @@ -670,7 +682,7 @@ static struct dm_buffer *__alloc_buffer_
> static struct dm_buffer *__alloc_buffer_wait(struct dm_bufio_client *c)
> {
> struct dm_buffer *b = __alloc_buffer_wait_no_callback(c);
> - if (b && c->alloc_callback)
> + if (c->alloc_callback)
> c->alloc_callback(b);
> return b;
> }
> @@ -682,7 +694,7 @@ static void __free_buffer_wake(struct dm
> {
> struct dm_bufio_client *c = b->c;
>
> - if (c->need_reserved_buffers) {
> + if (unlikely(c->need_reserved_buffers != 0)) {
> list_add(&b->lru_list, &c->reserved_buffers);
> c->need_reserved_buffers--;
> } else
> @@ -704,6 +716,7 @@ static void __write_dirty_buffers_async(
> if (no_wait && test_bit(B_WRITING, &b->state))
> return;
> __write_dirty_buffer(b);
> + dm_bufio_cond_resched();
> }
> }
>
> @@ -716,7 +729,7 @@ static void __get_memory_limit(struct dm
> {
> unsigned long buffers;
>
> - if (dm_bufio_cache_size != dm_bufio_cache_size_latch) {
> + if (unlikely(dm_bufio_cache_size != dm_bufio_cache_size_latch)) {
> mutex_lock(&dm_bufio_clients_lock);
> __cache_size_refresh();
> mutex_unlock(&dm_bufio_clients_lock);
> @@ -724,7 +737,7 @@ static void __get_memory_limit(struct dm
>
> buffers = dm_bufio_cache_size_per_client >>
> (c->sectors_per_block_bits + SECTOR_SHIFT);
> - if (buffers < DM_BUFIO_MIN_BUFFERS)
> + if (unlikely(buffers < DM_BUFIO_MIN_BUFFERS))
> buffers = DM_BUFIO_MIN_BUFFERS;
> *limit_buffers = buffers;
> *threshold_buffers = buffers * DM_BUFIO_WRITEBACK_PERCENT / 100;
> @@ -747,6 +760,7 @@ static void __check_watermark(struct dm_
> if (!b)
> return;
> __free_buffer_wake(b);
> + dm_bufio_cond_resched();
> }
> if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
> __write_dirty_buffers_async(c, 1);
> @@ -758,8 +772,9 @@ static struct dm_buffer *__find(struct d
> struct dm_buffer *b;
> struct hlist_node *hn;
> hlist_for_each_entry(b, hn, &c->cache_hash[DM_BUFIO_HASH(block)], hash_list) {
> - if (b->block == block)
> + if (likely(b->block == block))
> return b;
> + dm_bufio_cond_resched();
> }
>
> return NULL;
> @@ -775,12 +790,13 @@ enum new_flag {
> NF_GET = 2
> };
>
> -static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> - struct dm_buffer **bp, int *need_submit)
> +static void read_endio(struct bio *bio, int error);
> +
> +static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block,
> + enum new_flag nf, struct dm_buffer **bp)
> {
> struct dm_buffer *b, *new_b = NULL;
>
> - *need_submit = 0;
> b = __find(c, block);
> if (b) {
> b->hold_count++;
> @@ -821,7 +837,9 @@ static struct dm_buffer *__bufio_new(str
> }
>
> b->state = 1 << B_READING;
> - *need_submit = 1;
> +
> + submit_io(b, READ, b->block, read_endio);
> +
> return b;
> }
>
> @@ -849,22 +867,18 @@ static void read_endio(struct bio *bio,
> static void *new_read(struct dm_bufio_client *c, sector_t block, enum new_flag nf,
> struct dm_buffer **bp)
> {
> - int need_submit;
> struct dm_buffer *b;
>
> dm_bufio_lock(c);
> - b = __bufio_new(c, block, nf, bp, &need_submit);
> + b = __bufio_new(c, block, nf, bp);
> dm_bufio_unlock(c);
>
> - if (!b || IS_ERR(b))
> + if (unlikely(!b) || unlikely(IS_ERR(b)))
> return b;
> else {
> - if (need_submit)
> - submit_io(b, READ, b->block, read_endio);
> -
> wait_on_bit(&b->state, B_READING,
> do_io_schedule, TASK_UNINTERRUPTIBLE);
> - if (b->read_error != 0) {
> + if (unlikely(b->read_error != 0)) {
> int error = b->read_error;
> dm_bufio_release(b);
> return ERR_PTR(error);
> @@ -904,7 +918,7 @@ void dm_bufio_release(struct dm_buffer *
> BUG_ON(test_bit(B_READING, &b->state));
> BUG_ON(!b->hold_count);
> b->hold_count--;
> - if (!b->hold_count) {
> + if (likely(!b->hold_count)) {
> wake_up(&c->free_buffer_wait);
>
> /*
> @@ -912,7 +926,7 @@ void dm_bufio_release(struct dm_buffer *
> * to be written, free the buffer. There is no point in caching
> * invalid buffer.
> */
> - if ((b->read_error || b->write_error) &&
> + if (unlikely((b->read_error | b->write_error) != 0) &&
> !test_bit(B_WRITING, &b->state) &&
> !test_bit(B_DIRTY, &b->state)) {
> __unlink_buffer(b);
> @@ -999,15 +1013,19 @@ again:
> * someone is doing some writes simultaneously with us --- in
> * this case, stop dropping the lock.
> */
> + dm_bufio_cond_resched();
> if (dropped_lock)
> goto again;
> }
> wake_up(&c->free_buffer_wait);
> dm_bufio_unlock(c);
>
> - a = xchg(&c->async_write_error, 0);
> + if (likely(!c->async_write_error))
> + a = 0;
> + else
> + a = xchg(&c->async_write_error, 0);
> f = dm_bufio_issue_flush(c);
> - if (a)
> + if (unlikely(a != 0))
> return a;
> return f;
> }
> @@ -1071,7 +1089,7 @@ retry:
> BUG_ON(!b->hold_count);
> BUG_ON(test_bit(B_READING, &b->state));
> __write_dirty_buffer(b);
> - if (b->hold_count == 1) {
> + if (likely(b->hold_count == 1)) {
> wait_on_bit(&b->state, B_WRITING,
> do_io_schedule, TASK_UNINTERRUPTIBLE);
> set_bit(B_DIRTY, &b->state);
> @@ -1193,6 +1211,7 @@ static void __scan(struct dm_bufio_clien
> if (!--nr_to_scan)
> return;
> }
> + dm_bufio_cond_resched();
> }
> }
> }
> @@ -1406,9 +1425,11 @@ static void cleanup_old_buffers(void)
> struct dm_buffer, lru_list);
> if (__cleanup_old_buffer(b, 0, max_age))
> break;
> + dm_bufio_cond_resched();
> }
>
> dm_bufio_unlock(c);
> + dm_bufio_cond_resched();
> }
> mutex_unlock(&dm_bufio_clients_lock);
> }
> Index: linux-3.1-rc3-fast/drivers/md/dm-bufio.h
> ===================================================================
> --- linux-3.1-rc3-fast.orig/drivers/md/dm-bufio.h 2011-10-14 20:56:45.000000000 +0200
> +++ /dev/null 1970-01-01 00:00:00.000000000 +0000
> @@ -1,110 +0,0 @@
> -/*
> - * Copyright (C) 2009-2011 2011 Red Hat, Inc.
> - *
> - * This file is released under the GPL.
> - */
> -
> -#ifndef DM_BUFIO_H
> -#define DM_BUFIO_H
> -
> -#include <linux/blkdev.h>
> -#include <linux/types.h>
> -
> -/*----------------------------------------------------------------*/
> -
> -struct dm_bufio_client;
> -struct dm_buffer;
> -
> -/*
> - * Create a buffered IO cache on a given device
> - */
> -struct dm_bufio_client *
> -dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
> - unsigned reserved_buffers, unsigned aux_size,
> - void (*alloc_callback)(struct dm_buffer *),
> - void (*write_callback)(struct dm_buffer *));
> -
> -/*
> - * Release a buffered IO cache.
> - */
> -void dm_bufio_client_destroy(struct dm_bufio_client *c);
> -
> -/*
> - * WARNING: to avoid deadlocks, these conditions are observed:
> - *
> - * - At most one thread can hold at most "reserved_buffers" simultaneously.
> - * - Each other threads can hold at most one buffer.
> - * - Threads which call only dm_bufio_get can hold unlimited number of
> - * buffers.
> - */
> -
> -/*
> - * Read a given block from disk. Returns pointer to data. Returns a
> - * pointer to dm_buffer that can be used to release the buffer or to make
> - * it dirty.
> - */
> -void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
> - struct dm_buffer **bp);
> -
> -/*
> - * Like dm_bufio_read, but return buffer from cache, don't read
> - * it. If the buffer is not in the cache, return NULL.
> - */
> -void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
> - struct dm_buffer **bp);
> -
> -/*
> - * Like dm_bufio_read, but don't read anything from the disk. It is
> - * expected that the caller initializes the buffer and marks it dirty.
> - */
> -void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
> - struct dm_buffer **bp);
> -
> -/*
> - * Release a reference obtained with dm_bufio_{read,get,new}. The data
> - * pointer and dm_buffer pointer is no longer valid after this call.
> - */
> -void dm_bufio_release(struct dm_buffer *b);
> -
> -/*
> - * Mark a buffer dirty. It should be called after the buffer is modified.
> - *
> - * In case of memory pressure, the buffer may be written after
> - * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So
> - * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
> - * the actual writing may occur earlier.
> - */
> -void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
> -
> -/*
> - * Initiate writing of dirty buffers, without waiting for completion.
> - */
> -void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
> -
> -/*
> - * Write all dirty buffers. Guarantees that all dirty buffers created prior
> - * to this call are on disk when this call exits.
> - */
> -int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
> -
> -/*
> - * Send an empty write barrier to the device to flush hardware disk cache.
> - */
> -int dm_bufio_issue_flush(struct dm_bufio_client *c);
> -
> -/*
> - * Like dm_bufio_release but also move the buffer to the new
> - * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
> - */
> -void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
> -
> -unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
> -sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
> -sector_t dm_bufio_get_block_number(struct dm_buffer *b);
> -void *dm_bufio_get_block_data(struct dm_buffer *b);
> -void *dm_bufio_get_aux_data(struct dm_buffer *b);
> -struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
> -
> -/*----------------------------------------------------------------*/
> -
> -#endif
> Index: linux-3.1-rc3-fast/include/linux/dm-bufio.h
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-3.1-rc3-fast/include/linux/dm-bufio.h 2011-10-14 20:56:46.000000000 +0200
> @@ -0,0 +1,110 @@
> +/*
> + * Copyright (C) 2009-2011 2011 Red Hat, Inc.
> + *
> + * This file is released under the GPL.
> + */
> +
> +#ifndef DM_BUFIO_H
> +#define DM_BUFIO_H
> +
> +#include <linux/blkdev.h>
> +#include <linux/types.h>
> +
> +/*----------------------------------------------------------------*/
> +
> +struct dm_bufio_client;
> +struct dm_buffer;
> +
> +/*
> + * Create a buffered IO cache on a given device
> + */
> +struct dm_bufio_client *
> +dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
> + unsigned reserved_buffers, unsigned aux_size,
> + void (*alloc_callback)(struct dm_buffer *),
> + void (*write_callback)(struct dm_buffer *));
> +
> +/*
> + * Release a buffered IO cache.
> + */
> +void dm_bufio_client_destroy(struct dm_bufio_client *c);
> +
> +/*
> + * WARNING: to avoid deadlocks, these conditions are observed:
> + *
> + * - At most one thread can hold at most "reserved_buffers" simultaneously.
> + * - Each other threads can hold at most one buffer.
> + * - Threads which call only dm_bufio_get can hold unlimited number of
> + * buffers.
> + */
> +
> +/*
> + * Read a given block from disk. Returns pointer to data. Returns a
> + * pointer to dm_buffer that can be used to release the buffer or to make
> + * it dirty.
> + */
> +void *dm_bufio_read(struct dm_bufio_client *c, sector_t block,
> + struct dm_buffer **bp);
> +
> +/*
> + * Like dm_bufio_read, but return buffer from cache, don't read
> + * it. If the buffer is not in the cache, return NULL.
> + */
> +void *dm_bufio_get(struct dm_bufio_client *c, sector_t block,
> + struct dm_buffer **bp);
> +
> +/*
> + * Like dm_bufio_read, but don't read anything from the disk. It is
> + * expected that the caller initializes the buffer and marks it dirty.
> + */
> +void *dm_bufio_new(struct dm_bufio_client *c, sector_t block,
> + struct dm_buffer **bp);
> +
> +/*
> + * Release a reference obtained with dm_bufio_{read,get,new}. The data
> + * pointer and dm_buffer pointer is no longer valid after this call.
> + */
> +void dm_bufio_release(struct dm_buffer *b);
> +
> +/*
> + * Mark a buffer dirty. It should be called after the buffer is modified.
> + *
> + * In case of memory pressure, the buffer may be written after
> + * dm_bufio_mark_buffer_dirty, but before dm_bufio_write_dirty_buffers. So
> + * dm_bufio_write_dirty_buffers guarantees that the buffer is on-disk but
> + * the actual writing may occur earlier.
> + */
> +void dm_bufio_mark_buffer_dirty(struct dm_buffer *b);
> +
> +/*
> + * Initiate writing of dirty buffers, without waiting for completion.
> + */
> +void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c);
> +
> +/*
> + * Write all dirty buffers. Guarantees that all dirty buffers created prior
> + * to this call are on disk when this call exits.
> + */
> +int dm_bufio_write_dirty_buffers(struct dm_bufio_client *c);
> +
> +/*
> + * Send an empty write barrier to the device to flush hardware disk cache.
> + */
> +int dm_bufio_issue_flush(struct dm_bufio_client *c);
> +
> +/*
> + * Like dm_bufio_release but also move the buffer to the new
> + * block. dm_bufio_write_dirty_buffers is needed to commit the new block.
> + */
> +void dm_bufio_release_move(struct dm_buffer *b, sector_t new_block);
> +
> +unsigned dm_bufio_get_block_size(struct dm_bufio_client *c);
> +sector_t dm_bufio_get_device_size(struct dm_bufio_client *c);
> +sector_t dm_bufio_get_block_number(struct dm_buffer *b);
> +void *dm_bufio_get_block_data(struct dm_buffer *b);
> +void *dm_bufio_get_aux_data(struct dm_buffer *b);
> +struct dm_bufio_client *dm_bufio_get_client(struct dm_buffer *b);
> +
> +/*----------------------------------------------------------------*/
> +
> +#endif
More information about the dm-devel
mailing list