[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]
[dm-devel] [PATCH 07/25] io-controller: core bfq scheduler changes for hierarchical setup
- From: Vivek Goyal <vgoyal redhat com>
- To: linux-kernel vger kernel org, containers lists linux-foundation org, dm-devel redhat com, jens axboe oracle com, nauman google com, dpshah google com, lizf cn fujitsu com, mikew google com, fchecconi gmail com, paolo valente unimore it, ryov valinux co jp, fernando oss ntt co jp, s-uchida ap jp nec com, taka valinux co jp, guijianfeng cn fujitsu com, jmoyer redhat com, dhaval linux vnet ibm com, balbir linux vnet ibm com, righi andrea gmail com, m-ikeda ds jp nec com, jbaron redhat com
- Cc: peterz infradead org, akpm linux-foundation org, snitzer redhat com, agk redhat com, vgoyal redhat com
- Subject: [dm-devel] [PATCH 07/25] io-controller: core bfq scheduler changes for hierarchical setup
- Date: Thu, 2 Jul 2009 16:01:39 -0400
o Some of the core bfq scheduler changes for hiearchical groups.
Signed-off-by: Fabio Checconi <fabio gandalf sssup it>
Signed-off-by: Paolo Valente <paolo valente unimore it>
Signed-off-by: Nauman Rafique <nauman google com>
Signed-off-by: Vivek Goyal <vgoyal redhat com>
---
block/elevator-fq.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++-----
block/elevator-fq.h | 4 +
init/Kconfig | 8 +++
3 files changed, 165 insertions(+), 16 deletions(-)
diff --git a/block/elevator-fq.c b/block/elevator-fq.c
index 67c02b9..0acfa2c 100644
--- a/block/elevator-fq.c
+++ b/block/elevator-fq.c
@@ -42,6 +42,69 @@ static int elv_rate_sampling_window = HZ / 10;
*/
#define WFQ_SERVICE_SHIFT 22
+#ifdef CONFIG_GROUP_IOSCHED
+#define for_each_entity(entity) \
+ for (; entity != NULL; entity = entity->parent)
+
+#define for_each_entity_safe(entity, parent) \
+ for (; entity && ({ parent = entity->parent; 1; }); entity = parent)
+
+
+static struct io_entity *bfq_lookup_next_entity(struct io_sched_data *sd,
+ int extract);
+
+static int bfq_update_next_active(struct io_sched_data *sd)
+{
+ struct io_group *iog;
+ struct io_entity *entity, *next_active;
+
+ if (sd->active_entity != NULL)
+ /* will update/requeue at the end of service */
+ return 0;
+
+ /*
+ * NOTE: this can be improved in may ways, such as returning
+ * 1 (and thus propagating upwards the update) only when the
+ * budget changes, or caching the bfqq that will be scheduled
+ * next from this subtree. By now we worry more about
+ * correctness than about performance...
+ */
+ next_active = bfq_lookup_next_entity(sd, 0);
+ sd->next_active = next_active;
+
+ if (next_active != NULL) {
+ iog = container_of(sd, struct io_group, sched_data);
+ entity = iog->my_entity;
+ if (entity != NULL)
+ entity->budget = next_active->budget;
+ }
+
+ return 1;
+}
+
+static inline void bfq_check_next_active(struct io_sched_data *sd,
+ struct io_entity *entity)
+{
+ BUG_ON(sd->next_active != entity);
+}
+#else /* GROUP_IOSCHED */
+#define for_each_entity(entity) \
+ for (; entity != NULL; entity = NULL)
+
+#define for_each_entity_safe(entity, parent) \
+ for (parent = NULL; entity != NULL; entity = parent)
+
+static inline int bfq_update_next_active(struct io_sched_data *sd)
+{
+ return 0;
+}
+
+static inline void bfq_check_next_active(struct io_sched_data *sd,
+ struct io_entity *entity)
+{
+}
+#endif /* GROUP_IOSCHED */
+
static inline int elv_prio_slice(struct elv_fq_data *efqd, int sync,
unsigned short prio)
{
@@ -587,8 +650,10 @@ static struct io_entity *bfq_lookup_next_entity(struct io_sched_data *sd,
entity = __bfq_lookup_next_entity(st);
if (entity != NULL) {
if (extract) {
+ bfq_check_next_active(sd, entity);
bfq_active_remove(st, entity);
sd->active_entity = entity;
+ sd->next_active = NULL;
}
break;
}
@@ -661,11 +726,8 @@ static void __bfq_activate_entity(struct io_entity *entity, int add_front)
if (add_front) {
struct io_entity *next_entity;
- /*
- * Determine the entity which will be dispatched next
- * Use sd->next_active once hierarchical patch is applied
- */
- next_entity = bfq_lookup_next_entity(sd, 0);
+ /* Determine the entity which will be dispatched next */
+ next_entity = sd->next_active;
if (next_entity && next_entity != entity) {
struct io_service_tree *new_st;
@@ -697,7 +759,21 @@ static void __bfq_activate_entity(struct io_entity *entity, int add_front)
*/
static void bfq_activate_entity(struct io_entity *entity, int add_front)
{
- __bfq_activate_entity(entity, add_front);
+ struct io_sched_data *sd;
+
+ for_each_entity(entity) {
+ __bfq_activate_entity(entity, add_front);
+
+ add_front = 0;
+ sd = entity->sched_data;
+ if (!bfq_update_next_active(sd))
+ /*
+ * No need to propagate the activation to the
+ * upper entities, as they will be updated when
+ * the active entity is rescheduled.
+ */
+ break;
+ }
}
/**
@@ -732,6 +808,8 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue)
bfq_idle_remove(st, entity);
else if (entity->tree != NULL)
BUG();
+ if (was_active || sd->next_active == entity)
+ ret = bfq_update_next_active(sd);
if (!requeue || !bfq_gt(entity->finish, st->vtime))
bfq_forget_entity(st, entity);
@@ -739,6 +817,7 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue)
bfq_idle_insert(st, entity);
BUG_ON(sd->active_entity == entity);
+ BUG_ON(sd->next_active == entity);
return ret;
}
@@ -750,18 +829,62 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue)
*/
static void bfq_deactivate_entity(struct io_entity *entity, int requeue)
{
- __bfq_deactivate_entity(entity, requeue);
+ struct io_sched_data *sd;
+ struct io_entity *parent;
+
+ for_each_entity_safe(entity, parent) {
+ sd = entity->sched_data;
+
+ if (!__bfq_deactivate_entity(entity, requeue))
+ /*
+ * The parent entity is still backlogged, and
+ * we don't need to update it as it is still
+ * under service.
+ */
+ break;
+
+ if (sd->next_active != NULL) {
+ /*
+ * The parent entity is still backlogged and
+ * the budgets on the path towards the root
+ * need to be updated.
+ */
+ goto update;
+ }
+
+ /*
+ * If we reach there the parent is no more backlogged and
+ * we want to propagate the dequeue upwards.
+ *
+ */
+
+ requeue = 1;
+ }
+
+ return;
+
+update:
+ entity = parent;
+ for_each_entity(entity) {
+ __bfq_activate_entity(entity, 0);
+
+ sd = entity->sched_data;
+ if (!bfq_update_next_active(sd))
+ break;
+ }
}
static void entity_served(struct io_entity *entity, unsigned long served)
{
struct io_service_tree *st;
- st = io_entity_service_tree(entity);
- entity->service += served;
- BUG_ON(st->wsum == 0);
- st->vtime += bfq_delta(served, st->wsum);
- bfq_forget_idle(st);
+ for_each_entity(entity) {
+ st = io_entity_service_tree(entity);
+ entity->service += served;
+ BUG_ON(st->wsum == 0);
+ st->vtime += bfq_delta(served, st->wsum);
+ bfq_forget_idle(st);
+ }
}
/**
@@ -1154,11 +1277,25 @@ static struct io_queue *elv_get_next_ioq(struct request_queue *q, int extract)
return NULL;
sd = &efqd->root_group->sched_data;
- entity = bfq_lookup_next_entity(sd, 1);
- BUG_ON(!entity);
- if (extract)
- entity->service = 0;
+ for (; sd != NULL; sd = entity->my_sched_data) {
+ entity = bfq_lookup_next_entity(sd, 1);
+ /*
+ * entity can be null despite the fact that there are busy
+ * queues. if all the busy queues are under a group which is
+ * currently under service.
+ * So if we are just looking for next ioq while something is
+ * being served, null entity is not an error.
+ */
+ BUG_ON(!entity && extract);
+
+ if (extract)
+ entity->service = 0;
+
+ if (!entity)
+ return NULL;
+ }
+
ioq = io_entity_to_ioq(entity);
return ioq;
diff --git a/block/elevator-fq.h b/block/elevator-fq.h
index 4b69239..57207c4 100644
--- a/block/elevator-fq.h
+++ b/block/elevator-fq.h
@@ -72,6 +72,7 @@ struct io_service_tree {
*/
struct io_sched_data {
struct io_entity *active_entity;
+ struct io_entity *next_active;
struct io_service_tree service_tree[IO_IOPRIO_CLASSES];
};
@@ -181,7 +182,10 @@ struct io_queue {
};
struct io_group {
+ struct io_entity entity;
struct io_sched_data sched_data;
+ struct io_entity *my_entity;
+
/*
* async queue for each priority case for RT and BE class.
* Used only for cfq.
diff --git a/init/Kconfig b/init/Kconfig
index 1ce05a4..a380f46 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -612,6 +612,14 @@ config CGROUP_MEM_RES_CTLR_SWAP
Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
size is 4096bytes, 512k per 1Gbytes of swap.
+config GROUP_IOSCHED
+ bool "Group IO Scheduler"
+ depends on CGROUPS && ELV_FAIR_QUEUING
+ default n
+ ---help---
+ This feature lets IO scheduler recognize task groups and control
+ disk bandwidth allocation to such task groups.
+
endif # CGROUPS
config MM_OWNER
--
1.6.0.6
[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]