[dm-devel] [PATCH] dm statistics
Mikulas Patocka
mpatocka at redhat.com
Thu Jan 24 20:10:58 UTC 2013
Hi
Here I'm sending the patch for dm statistics.
(it depends on the RCU patch, but it could be trivially fixed to apply
without the RCU patch - see functions dm_internal_suspend and
dm_internal_resume).
---
Documentation/device-mapper/dm-statistics.txt | 44 ++
drivers/md/Makefile | 2
drivers/md/dm-ioctl.c | 144 ++++++++
drivers/md/dm-stats.c | 430 ++++++++++++++++++++++++++
drivers/md/dm-stats.h | 38 ++
drivers/md/dm.c | 48 ++
drivers/md/dm.h | 8
include/uapi/linux/dm-ioctl.h | 5
8 files changed, 716 insertions(+), 3 deletions(-)
Index: linux-3.8-rc4-fast/drivers/md/dm-ioctl.c
===================================================================
--- linux-3.8-rc4-fast.orig/drivers/md/dm-ioctl.c 2013-01-24 20:55:22.000000000 +0100
+++ linux-3.8-rc4-fast/drivers/md/dm-ioctl.c 2013-01-24 20:55:55.000000000 +0100
@@ -1451,6 +1451,141 @@ static int table_status(struct dm_ioctl
return 0;
}
+struct dm_message_output_callback {
+ struct dm_ioctl *param;
+ size_t param_size;
+};
+
+static int dm_output_message_string(struct dm_message_output_callback *c,
+ const char *string)
+{
+ size_t len;
+ char *p;
+ if (c->param->flags & DM_BUFFER_FULL_FLAG)
+ return -1;
+ if (!(c->param->flags & DM_MESSAGE_OUT_FLAG)) {
+ p = get_result_buffer(c->param, c->param_size, &len);
+ if (!len) {
+ c->param->flags |= DM_BUFFER_FULL_FLAG;
+ return -1;
+ }
+ *p = 0;
+ c->param->data_size = c->param->data_start + 1;
+ c->param->flags |= DM_MESSAGE_OUT_FLAG;
+ }
+ p = (char *)c->param + c->param->data_size - 1;
+ len = strlen(string);
+ if (c->param->data_size + len > c->param_size) {
+ c->param->flags |= DM_BUFFER_FULL_FLAG;
+ c->param->flags &= ~DM_MESSAGE_OUT_FLAG;
+ return -1;
+ }
+ c->param->data_size += len;
+ strcpy(p, string);
+ return 0;
+}
+
+/*
+ * Process device-mapper dependent messages.
+ * Returns a number <= 0 if message was processed by device mapper.
+ * Returns 1 if message should be delivered to the target.
+ */
+static int message_for_md(struct mapped_device *md,
+ struct dm_message_output_callback *c,
+ unsigned argc, char **argv)
+{
+ int id;
+ char dummy;
+ if (!strcasecmp(argv[0], "@stats_create")) {
+ unsigned long long start, end, step;
+ unsigned div;
+ char id_string[11];
+
+ if (dm_request_based(md))
+ goto no_rq_based_stats;
+
+ if (argc != 3)
+ goto invalid_message;
+
+ if (!strcmp(argv[1], "-")) {
+ start = 0;
+ end = dm_get_size(md);
+ if (!end)
+ end = 1;
+ } else if (sscanf(argv[1], "%llu-%llu%c", &start, &end, &dummy) != 2 ||
+ start != (sector_t)start || end != (sector_t)end)
+ goto invalid_message;
+
+ if (start >= end)
+ goto invalid_message;
+
+ if (sscanf(argv[2], "/%u%c", &div, &dummy) == 1) {
+ step = end - start;
+ if (do_div(step, div))
+ step++;
+ if (!step)
+ step = 1;
+ } else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 || step != (sector_t)step || !step)
+ goto invalid_message;
+
+ /*
+ * Suspend/resume to make sure there is no i/o in flight, so that newly
+ * created statistics will be exact.
+ */
+ dm_internal_suspend(md);
+ id = dm_stats_create(dm_get_stats(md), start, end, step);
+ dm_internal_resume(md);
+
+ if (id < 0)
+ return id;
+
+ snprintf(id_string, sizeof id_string, "%d", id);
+ dm_output_message_string(c, id_string);
+
+ return 0;
+ } else if (!strcasecmp(argv[0], "@stats_delete")) {
+ if (dm_request_based(md))
+ goto no_rq_based_stats;
+
+ if (argc != 2)
+ goto invalid_message;
+
+ if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+ goto invalid_message;
+
+ return dm_stats_delete(dm_get_stats(md), id);
+ } else if (!strcasecmp(argv[0], "@stats_print")) {
+ if (dm_request_based(md))
+ goto no_rq_based_stats;
+
+ if (argc != 2)
+ goto invalid_message;
+ if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+ goto invalid_message;
+ return dm_stats_print(dm_get_stats(md), id, false, c,
+ dm_output_message_string);
+ } else if (!strcasecmp(argv[0], "@stats_print_clear")) {
+ if (dm_request_based(md))
+ goto no_rq_based_stats;
+
+ if (argc != 2)
+ goto invalid_message;
+ if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+ goto invalid_message;
+ return dm_stats_print(dm_get_stats(md), id, true, c,
+ dm_output_message_string);
+ }
+ return 1;
+
+no_rq_based_stats:
+ DMWARN("Statistics are only supported for bio based devices");
+ return -EOPNOTSUPP;
+
+invalid_message:
+ DMWARN("Invalid parameters for message %s", argv[0]);
+ return -EINVAL;
+}
+
/*
* Pass a message to the target that's at the supplied device offset.
*/
@@ -1463,6 +1598,7 @@ static int target_message(struct dm_ioct
struct dm_target *ti;
struct dm_target_msg *tmsg = (void *) param + param->data_start;
int srcu_idx;
+ struct dm_message_output_callback c = { param, param_size };
md = find_device(param);
if (!md)
@@ -1486,6 +1622,10 @@ static int target_message(struct dm_ioct
goto out_argv;
}
+ r = message_for_md(md, &c, argc, argv);
+ if (r <= 0)
+ goto out_argv;
+
table = dm_get_live_table(md, &srcu_idx);
if (!table)
goto out_table;
@@ -1511,7 +1651,8 @@ static int target_message(struct dm_ioct
out_argv:
kfree(argv);
out:
- param->data_size = 0;
+ if (!(param->flags & (DM_MESSAGE_OUT_FLAG | DM_BUFFER_FULL_FLAG)))
+ param->data_size = 0;
dm_put(md);
return r;
}
@@ -1685,6 +1826,7 @@ static int validate_params(uint cmd, str
param->flags &= ~DM_BUFFER_FULL_FLAG;
param->flags &= ~DM_UEVENT_GENERATED_FLAG;
param->flags &= ~DM_SECURE_DATA_FLAG;
+ param->flags &= ~DM_MESSAGE_OUT_FLAG;
/* Ignores parameters */
if (cmd == DM_REMOVE_ALL_CMD ||
Index: linux-3.8-rc4-fast/include/uapi/linux/dm-ioctl.h
===================================================================
--- linux-3.8-rc4-fast.orig/include/uapi/linux/dm-ioctl.h 2013-01-24 20:55:22.000000000 +0100
+++ linux-3.8-rc4-fast/include/uapi/linux/dm-ioctl.h 2013-01-24 20:55:55.000000000 +0100
@@ -336,4 +336,9 @@ enum {
*/
#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+/*
+ * If set, message generated output.
+ */
+#define DM_MESSAGE_OUT_FLAG (1 << 16) /* Out */
+
#endif /* _LINUX_DM_IOCTL_H */
Index: linux-3.8-rc4-fast/drivers/md/Makefile
===================================================================
--- linux-3.8-rc4-fast.orig/drivers/md/Makefile 2013-01-24 20:55:19.000000000 +0100
+++ linux-3.8-rc4-fast/drivers/md/Makefile 2013-01-24 20:55:55.000000000 +0100
@@ -3,7 +3,7 @@
#
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
- dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
+ dm-ioctl.o dm-stats.o dm-io.o dm-kcopyd.o dm-sysfs.o
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
Index: linux-3.8-rc4-fast/drivers/md/dm-stats.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-3.8-rc4-fast/drivers/md/dm-stats.c 2013-01-24 20:55:55.000000000 +0100
@@ -0,0 +1,430 @@
+#include <linux/errno.h>
+#include <linux/numa.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/threads.h>
+#include <linux/preempt.h>
+#include <linux/irqflags.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
+
+#include "dm-stats.h"
+
+static volatile int dm_stat_need_rcu_barrier;
+
+struct dm_stat_percpu {
+ unsigned long sectors[2];
+ unsigned long ios[2];
+ unsigned long ticks[2];
+ unsigned long io_ticks;
+ unsigned long time_in_queue;
+};
+
+struct dm_stat_shared {
+ atomic_t in_flight[2];
+ unsigned long stamp;
+ struct dm_stat_percpu tmp;
+};
+
+struct dm_stat {
+ struct list_head list_entry;
+ int id;
+ size_t n_entries;
+ sector_t start;
+ sector_t end;
+ sector_t step;
+ struct rcu_head rcu_head;
+ struct dm_stat_percpu *stat_percpu[NR_CPUS];
+ struct dm_stat_shared stat_shared[0];
+};
+
+static void *kvzalloc(size_t alloc_size, int node)
+{
+ void *p;
+ if (alloc_size <= KMALLOC_MAX_SIZE) {
+ p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
+ if (p)
+ return p;
+ }
+ return vzalloc_node(alloc_size, node);
+}
+
+static void kvfree(void *ptr)
+{
+ if (is_vmalloc_addr(ptr))
+ vfree(ptr);
+ else
+ kfree(ptr);
+}
+
+static void dm_stat_free(struct rcu_head *head)
+{
+ struct dm_stat *m = container_of(head, struct dm_stat, rcu_head);
+ int cpu;
+ for_each_possible_cpu(cpu)
+ kvfree(m->stat_percpu[cpu]);
+ kvfree(m);
+}
+
+static int dm_stat_in_flight(struct dm_stat_shared *s)
+{
+ return atomic_read(&s->in_flight[0]) + atomic_read(&s->in_flight[1]);
+}
+
+void dm_stats_init_device(struct dm_stats *st)
+{
+ mutex_init(&st->mutex);
+ INIT_LIST_HEAD(&st->list);
+}
+
+void dm_stats_exit_device(struct dm_stats *st)
+{
+ size_t ni;
+ while (!list_empty(&st->list)) {
+ struct dm_stat *m = container_of(st->list.next, struct dm_stat, list_entry);
+ list_del(&m->list_entry);
+ for (ni = 0; ni < m->n_entries; ni++) {
+ struct dm_stat_shared *s = &m->stat_shared[ni];
+ if (dm_stat_in_flight(s)) {
+ printk(KERN_CRIT "dm-stats: leaked in-flight counter at index %lu (start %llu, end %llu, step %llu): reads %d, writes %d\n",
+ (unsigned long)ni,
+ (unsigned long long)m->start,
+ (unsigned long long)m->end,
+ (unsigned long long)m->step,
+ atomic_read(&s->in_flight[0]),
+ atomic_read(&s->in_flight[1])
+ );
+ BUG();
+ }
+ }
+ dm_stat_free(&m->rcu_head);
+ }
+}
+
+int dm_stats_create(struct dm_stats *st, sector_t start, sector_t end, sector_t step)
+{
+ struct list_head *l;
+ struct dm_stat *s;
+ sector_t n_entries;
+ size_t ni;
+ size_t shared_alloc_size;
+ size_t percpu_alloc_size;
+ int cpu;
+ int ret_id;
+
+ if (end < start || !step)
+ return -EINVAL;
+
+ n_entries = end - start;
+ if (sector_div(n_entries, step))
+ n_entries++;
+
+ if (n_entries != (size_t)n_entries || !(n_entries + 1))
+ return -EOVERFLOW;
+
+ shared_alloc_size = sizeof(struct dm_stat) + (size_t)n_entries * sizeof(struct dm_stat_shared);
+ if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
+ return -EOVERFLOW;
+
+ percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu);
+ if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries)
+ return -EOVERFLOW;
+
+ s = kvzalloc(shared_alloc_size, NUMA_NO_NODE);
+ if (!s)
+ return -ENOMEM;
+
+ s->n_entries = n_entries;
+ s->start = start;
+ s->end = end;
+ s->step = step;
+ s->id = 0;
+
+ for (ni = 0; ni < n_entries; ni++) {
+ atomic_set(&s->stat_shared[ni].in_flight[0], 0);
+ atomic_set(&s->stat_shared[ni].in_flight[1], 0);
+ }
+
+ for_each_possible_cpu(cpu) {
+ struct dm_stat_percpu *pc = kvzalloc(percpu_alloc_size, cpu_to_node(cpu));
+ if (!pc) {
+ dm_stat_free(&s->rcu_head);
+ return -ENOMEM;
+ }
+ s->stat_percpu[cpu] = pc;
+ }
+
+ mutex_lock(&st->mutex);
+ list_for_each(l, &st->list) {
+ struct dm_stat *m = container_of(l, struct dm_stat, list_entry);
+ if (m->id < s->id)
+ BUG();
+ if (m->id > s->id)
+ break;
+ if (s->id == INT_MAX) {
+ mutex_unlock(&st->mutex);
+ return -ENFILE;
+ }
+ s->id++;
+ }
+ ret_id = s->id;
+ list_add_tail_rcu(&s->list_entry, l);
+ mutex_unlock(&st->mutex);
+
+ return ret_id;
+}
+
+static struct dm_stat *dm_stats_find(struct dm_stats *st, int id)
+{
+ struct dm_stat *m;
+
+ mutex_lock(&st->mutex);
+
+ list_for_each_entry(m, &st->list, list_entry) {
+ if (m->id > id)
+ break;
+ if (m->id == id)
+ return m;
+ }
+
+ mutex_unlock(&st->mutex);
+
+ return NULL;
+}
+
+int dm_stats_delete(struct dm_stats *st, int id)
+{
+ struct dm_stat *m;
+ int cpu;
+
+ m = dm_stats_find(st, id);
+ if (!m)
+ return -ENOENT;
+
+ list_del_rcu(&m->list_entry);
+ mutex_unlock(&st->mutex);
+
+ /*
+ * vfree can't be called from RCU callback
+ */
+ for_each_possible_cpu(cpu)
+ if (is_vmalloc_addr(m->stat_percpu))
+ goto do_sync_free;
+ if (is_vmalloc_addr(m)) {
+do_sync_free:
+ synchronize_rcu_expedited();
+ dm_stat_free(&m->rcu_head);
+ } else {
+ dm_stat_need_rcu_barrier = 1;
+ call_rcu(&m->rcu_head, dm_stat_free);
+ }
+ return 0;
+}
+
+static void dm_stat_round(struct dm_stat_shared *s, struct dm_stat_percpu *p)
+{
+ /*
+ * This is racy, but so is part_round_stats_single.
+ */
+ unsigned long now = jiffies;
+ unsigned inf;
+ if (now == s->stamp)
+ return;
+ inf = dm_stat_in_flight(s);
+ if (inf) {
+ p->io_ticks += now - s->stamp;
+ p->time_in_queue += inf * (now - s->stamp);
+ }
+ s->stamp = now;
+}
+
+static void dm_stat_for_entry(struct dm_stat *m, size_t entry,
+ unsigned long bi_rw, unsigned len, bool end,
+ unsigned long duration)
+{
+ unsigned long idx = bi_rw & REQ_WRITE;
+ struct dm_stat_shared *s = &m->stat_shared[entry];
+ struct dm_stat_percpu *p;
+
+ /*
+ * For strict correctness we should use local_irq_disable/enable
+ * instead of preempt_disable/enable.
+ *
+ * This is racy if the driver finishes bios from non-interrupt
+ * context as well as from interrupt context or from more different
+ * interrupts.
+ *
+ * However, the race only results in not counting some events,
+ * so it is acceptable.
+ *
+ * part_stat_lock()/part_stat_unlock() have this race too.
+ */
+ preempt_disable();
+ p = &m->stat_percpu[smp_processor_id()][entry];
+
+ if (!end) {
+ dm_stat_round(s, p);
+ atomic_inc(&s->in_flight[idx]);
+ } else {
+ dm_stat_round(s, p);
+ atomic_dec(&s->in_flight[idx]);
+ p->sectors[idx] += len;
+ p->ios[idx] += 1;
+ p->ticks[idx] += duration;
+ }
+
+ preempt_enable();
+}
+
+static bool dm_stats_should_drop_bio(struct bio *bio)
+{
+ return !bio->bi_size;
+}
+
+void dm_stats_bio(struct dm_stats *st, struct bio *bio, bool end,
+ unsigned long duration)
+{
+ struct dm_stat *m;
+ sector_t end_sector;
+
+ if (unlikely(dm_stats_should_drop_bio(bio)))
+ return;
+
+ end_sector = bio->bi_sector + bio_sectors(bio);
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(m, &st->list, list_entry) {
+ sector_t rel_sector, offset;
+ unsigned todo;
+ size_t entry;
+ if (end_sector <= m->start || bio->bi_sector >= m->end)
+ continue;
+ if (unlikely(bio->bi_sector < m->start)) {
+ rel_sector = 0;
+ todo = end_sector - m->start;
+ } else {
+ rel_sector = bio->bi_sector - m->start;
+ todo = end_sector - bio->bi_sector;
+ }
+ if (unlikely(end_sector > m->end))
+ todo -= end_sector - m->end;
+ offset = sector_div(rel_sector, m->step);
+ entry = rel_sector;
+ do {
+ unsigned fragment_len;
+ BUG_ON(entry >= m->n_entries);
+ fragment_len = todo;
+ if (fragment_len > m->step - offset)
+ fragment_len = m->step - offset;
+ dm_stat_for_entry(m, entry, bio->bi_rw, fragment_len,
+ end, duration);
+ todo -= fragment_len;
+ entry++;
+ offset = 0;
+ } while (unlikely(todo != 0));
+ }
+
+ rcu_read_unlock();
+}
+
+int dm_stats_print(struct dm_stats *st, int id, bool clear,
+ struct dm_message_output_callback *c,
+ int (*callback)(struct dm_message_output_callback *, const char *))
+{
+ struct dm_stat *m;
+ size_t x;
+ sector_t start, end;
+
+ m = dm_stats_find(st, id);
+ if (!m)
+ return -ENOENT;
+
+ start = m->start;
+
+ for (x = 0; x < m->n_entries; x++, start = end) {
+ int cpu;
+ struct dm_stat_shared *s = &m->stat_shared[x];
+ struct dm_stat_percpu *p;
+ const int LD = sizeof(unsigned long) > 4 ? 20 : 10;
+ const int SD = sizeof(sector_t) > 4 ? 20 : 10;
+ char out_string[SD+1+SD+1+LD+3+LD+1+LD+1+LD+3+LD+1+LD+1+10+1+LD+1+LD+2];
+
+ end = start + m->step;
+ if (unlikely(end > m->end))
+ end = m->end;
+
+ local_irq_disable();
+ p = &m->stat_percpu[smp_processor_id()][x];
+ dm_stat_round(s, p);
+ local_irq_enable();
+
+ memset(&s->tmp, 0, sizeof s->tmp);
+ for_each_possible_cpu(cpu) {
+ p = &m->stat_percpu[cpu][x];
+ s->tmp.sectors[0] += p->sectors[0];
+ s->tmp.sectors[1] += p->sectors[1];
+ s->tmp.ios[0] += p->ios[0];
+ s->tmp.ios[1] += p->ios[1];
+ s->tmp.ticks[0] += p->ticks[0];
+ s->tmp.ticks[1] += p->ticks[1];
+ s->tmp.io_ticks += p->io_ticks;
+ s->tmp.time_in_queue += p->time_in_queue;
+ }
+
+ snprintf(out_string, sizeof(out_string),
+ "%llu-%llu %lu %u %lu %lu %lu %u %lu %lu %d %lu %lu\n",
+ (unsigned long long)start,
+ (unsigned long long)end,
+ s->tmp.ios[0],
+ 0U,
+ s->tmp.sectors[0],
+ s->tmp.ticks[0],
+ s->tmp.ios[1],
+ 0U,
+ s->tmp.sectors[1],
+ s->tmp.ticks[1],
+ dm_stat_in_flight(s),
+ s->tmp.io_ticks,
+ s->tmp.time_in_queue
+ );
+ if (callback(c, out_string))
+ goto buffer_overflow;
+ }
+
+ if (clear) {
+ for (x = 0; x < m->n_entries; x++) {
+ struct dm_stat_shared *s = &m->stat_shared[x];
+ struct dm_stat_percpu *p;
+ local_irq_disable();
+ p = &m->stat_percpu[smp_processor_id()][x];
+ p->sectors[0] -= s->tmp.sectors[0];
+ p->sectors[1] -= s->tmp.sectors[1];
+ p->ios[0] -= s->tmp.ios[0];
+ p->ios[1] -= s->tmp.ios[1];
+ p->ticks[0] -= s->tmp.ticks[0];
+ p->ticks[1] -= s->tmp.ticks[1];
+ p->io_ticks -= s->tmp.io_ticks;
+ p->time_in_queue -= s->tmp.time_in_queue;
+ local_irq_enable();
+ }
+ }
+
+buffer_overflow:
+ mutex_unlock(&st->mutex);
+
+ return 0;
+}
+
+int __init dm_stats_init(void)
+{
+ dm_stat_need_rcu_barrier = 0;
+ return 0;
+}
+
+void dm_stats_exit(void)
+{
+ if (dm_stat_need_rcu_barrier)
+ rcu_barrier();
+}
Index: linux-3.8-rc4-fast/drivers/md/dm-stats.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-3.8-rc4-fast/drivers/md/dm-stats.h 2013-01-24 20:55:55.000000000 +0100
@@ -0,0 +1,38 @@
+#ifndef DM_STATS_H
+#define DM_STATS_H
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/genhd.h>
+
+int dm_stats_init(void);
+void dm_stats_exit(void);
+
+struct dm_stats {
+ struct mutex mutex;
+ struct list_head list; /* list of struct dm_stat */
+};
+
+void dm_stats_init_device(struct dm_stats *st);
+void dm_stats_exit_device(struct dm_stats *st);
+
+int dm_stats_create(struct dm_stats *st, sector_t start, sector_t end, sector_t step);
+int dm_stats_delete(struct dm_stats *st, int id);
+
+void dm_stats_bio(struct dm_stats *st, struct bio *bio, bool end,
+ unsigned long duration);
+
+struct dm_message_output_callback;
+
+int dm_stats_print(struct dm_stats *st, int id, bool clear,
+ struct dm_message_output_callback *c,
+ int (*callback)(struct dm_message_output_callback *, const char *));
+
+static inline bool dm_stats_used(struct dm_stats *st)
+{
+ return !list_empty(&st->list);
+}
+
+#endif
Index: linux-3.8-rc4-fast/drivers/md/dm.c
===================================================================
--- linux-3.8-rc4-fast.orig/drivers/md/dm.c 2013-01-24 20:55:19.000000000 +0100
+++ linux-3.8-rc4-fast/drivers/md/dm.c 2013-01-24 20:55:55.000000000 +0100
@@ -176,6 +176,8 @@ struct mapped_device {
struct bio_set *bs;
+ struct dm_stats stats;
+
/*
* Event handling.
*/
@@ -284,6 +286,7 @@ static int (*_inits[])(void) __initdata
dm_io_init,
dm_kcopyd_init,
dm_interface_init,
+ dm_stats_init,
};
static void (*_exits[])(void) = {
@@ -294,6 +297,7 @@ static void (*_exits[])(void) = {
dm_io_exit,
dm_kcopyd_exit,
dm_interface_exit,
+ dm_stats_exit,
};
static int __init dm_init(void)
@@ -402,6 +406,16 @@ int dm_lock_for_deletion(struct mapped_d
return r;
}
+sector_t dm_get_size(struct mapped_device *md)
+{
+ return get_capacity(md->disk);
+}
+
+struct dm_stats *dm_get_stats(struct mapped_device *md)
+{
+ return &md->stats;
+}
+
static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct mapped_device *md = bdev->bd_disk->private_data;
@@ -486,6 +500,9 @@ static void start_io_acct(struct dm_io *
part_stat_unlock();
atomic_set(&dm_disk(md)->part0.in_flight[rw],
atomic_inc_return(&md->pending[rw]));
+
+ if (unlikely(dm_stats_used(&md->stats)))
+ dm_stats_bio(&md->stats, io->bio, false, 0);
}
static void end_io_acct(struct dm_io *io)
@@ -501,6 +518,9 @@ static void end_io_acct(struct dm_io *io
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
part_stat_unlock();
+ if (unlikely(dm_stats_used(&md->stats)))
+ dm_stats_bio(&md->stats, bio, true, duration);
+
/*
* After this is decremented the bio must not be touched if it is
* a flush.
@@ -1479,7 +1499,7 @@ static void _dm_request(struct request_q
return;
}
-static int dm_request_based(struct mapped_device *md)
+int dm_request_based(struct mapped_device *md)
{
return blk_queue_stackable(md->queue);
}
@@ -1944,6 +1964,8 @@ static struct mapped_device *alloc_dev(i
md->flush_bio.bi_bdev = md->bdev;
md->flush_bio.bi_rw = WRITE_FLUSH;
+ dm_stats_init_device(&md->stats);
+
/* Populate the mapping, nobody knows we exist yet */
spin_lock(&_minor_lock);
old_md = idr_replace(&_minor_idr, md, minor);
@@ -1997,6 +2019,7 @@ static void free_dev(struct mapped_devic
put_disk(md->disk);
blk_cleanup_queue(md->queue);
+ dm_stats_exit_device(&md->stats);
module_put(THIS_MODULE);
kfree(md);
}
@@ -2671,6 +2694,29 @@ out:
return r;
}
+void dm_internal_suspend(struct mapped_device *md)
+{
+ mutex_lock(&md->suspend_lock);
+ if (dm_suspended_md(md))
+ return;
+
+ set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+ synchronize_srcu(&md->io_barrier);
+ flush_workqueue(md->wq);
+ dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+}
+
+void dm_internal_resume(struct mapped_device *md)
+{
+ if (dm_suspended_md(md))
+ goto done;
+
+ dm_queue_flush(md);
+
+done:
+ mutex_unlock(&md->suspend_lock);
+}
+
/*-----------------------------------------------------------------
* Event notification.
*---------------------------------------------------------------*/
Index: linux-3.8-rc4-fast/drivers/md/dm.h
===================================================================
--- linux-3.8-rc4-fast.orig/drivers/md/dm.h 2013-01-24 20:55:19.000000000 +0100
+++ linux-3.8-rc4-fast/drivers/md/dm.h 2013-01-24 20:55:55.000000000 +0100
@@ -16,6 +16,8 @@
#include <linux/blkdev.h>
#include <linux/hdreg.h>
+#include "dm-stats.h"
+
/*
* Suspend feature flags
*/
@@ -146,10 +148,16 @@ void dm_destroy(struct mapped_device *md
void dm_destroy_immediate(struct mapped_device *md);
int dm_open_count(struct mapped_device *md);
int dm_lock_for_deletion(struct mapped_device *md);
+int dm_request_based(struct mapped_device *md);
+sector_t dm_get_size(struct mapped_device *md);
+struct dm_stats *dm_get_stats(struct mapped_device *md);
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
unsigned cookie);
+void dm_internal_suspend(struct mapped_device *md);
+void dm_internal_resume(struct mapped_device *md);
+
int dm_io_init(void);
void dm_io_exit(void);
Index: linux-3.8-rc4-fast/Documentation/device-mapper/dm-statistics.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-3.8-rc4-fast/Documentation/device-mapper/dm-statistics.txt 2013-01-24 20:59:17.000000000 +0100
@@ -0,0 +1,44 @@
+dm statistics
+
+Device mapper can calculate I/O statistics on various regions of
+the device.
+
+Each region specifies a starting sector, ending sector and step.
+Individual statistics will be collected for each step-sized area
+between starting and ending sector.
+
+Each region is identified by a region id, it is integer number that is
+uniquely assigned when creating the region. The region number must be
+supplied when querying statistics about the region or deleting the
+region. Unique region ids enable multiple userspace programs request and
+process statistics without stepping over each other's data.
+
+New region is specified with the following message:
+dmsetup message <device> 0 @stats_create <range> <step>
+ range is
+ "-" - whole device
+ "<start>-<end>" - a specified range in 512-byte sectors
+ step is
+ "<number>" - the number of sectors in each area
+ "/<number>" - the range is subdivided into the specified
+ number of areas
+The message returns the region id.
+
+Statistics can be queried with the following message:
+dmsetup message <device> 0 @stats_print <id>
+This message returns statistics, each area is represented by one line in
+this form:
+<starting sector>-<ending sector> counters
+Counters have the same meaning as /sys/block/*/stat or /proc/diskstats
+The counter of merged requests is always zero because merging has no
+meaning in device mapper.
+
+The message
+dmsetup message <device> 0 @stats_print_clear <id>
+prints the counters and clears them (except in-flight counter, it
+reflects the current number of in-flight requests and it is not
+cleared).
+
+The message
+dmsetup message <device> 0 @stats_delete <id>
+deletes the range with the specified id.
More information about the dm-devel
mailing list