From patchwork Thu Jul 2 20:01:39 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Vivek Goyal X-Patchwork-Id: 33782 Received: from hormel.redhat.com (hormel1.redhat.com [209.132.177.33]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n62K2VKf030864 for ; Thu, 2 Jul 2009 20:02:31 GMT Received: from listman.util.phx.redhat.com (listman.util.phx.redhat.com [10.8.4.110]) by hormel.redhat.com (Postfix) with ESMTP id CF78761AB91; Thu, 2 Jul 2009 16:02:30 -0400 (EDT) Received: from int-mx2.corp.redhat.com ([172.16.27.26]) by listman.util.phx.redhat.com (8.13.1/8.13.1) with ESMTP id n62K29J4030266 for ; Thu, 2 Jul 2009 16:02:09 -0400 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n62K238V024096; Thu, 2 Jul 2009 16:02:03 -0400 Received: from machine.usersys.redhat.com (dhcp-100-19-148.bos.redhat.com [10.16.19.148]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n62K22bR014916; Thu, 2 Jul 2009 16:02:02 -0400 Received: by machine.usersys.redhat.com (Postfix, from userid 10451) id A84AF267FE; Thu, 2 Jul 2009 16:01:57 -0400 (EDT) From: Vivek Goyal To: linux-kernel@vger.kernel.org, containers@lists.linux-foundation.org, dm-devel@redhat.com, jens.axboe@oracle.com, nauman@google.com, dpshah@google.com, lizf@cn.fujitsu.com, mikew@google.com, fchecconi@gmail.com, paolo.valente@unimore.it, ryov@valinux.co.jp, fernando@oss.ntt.co.jp, s-uchida@ap.jp.nec.com, taka@valinux.co.jp, guijianfeng@cn.fujitsu.com, jmoyer@redhat.com, dhaval@linux.vnet.ibm.com, balbir@linux.vnet.ibm.com, righi.andrea@gmail.com, m-ikeda@ds.jp.nec.com, jbaron@redhat.com Date: Thu, 2 Jul 2009 16:01:39 -0400 Message-Id: <1246564917-19603-8-git-send-email-vgoyal@redhat.com> In-Reply-To: <1246564917-19603-1-git-send-email-vgoyal@redhat.com> References: <1246564917-19603-1-git-send-email-vgoyal@redhat.com> X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 X-loop: dm-devel@redhat.com Cc: peterz@infradead.org, akpm@linux-foundation.org, snitzer@redhat.com, agk@redhat.com, vgoyal@redhat.com Subject: [dm-devel] [PATCH 07/25] io-controller: core bfq scheduler changes for hierarchical setup X-BeenThere: dm-devel@redhat.com X-Mailman-Version: 2.1.5 Precedence: junk Reply-To: device-mapper development List-Id: device-mapper development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dm-devel-bounces@redhat.com Errors-To: dm-devel-bounces@redhat.com o Some of the core bfq scheduler changes for hiearchical groups. Signed-off-by: Fabio Checconi Signed-off-by: Paolo Valente Signed-off-by: Nauman Rafique Signed-off-by: Vivek Goyal --- block/elevator-fq.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++----- block/elevator-fq.h | 4 + init/Kconfig | 8 +++ 3 files changed, 165 insertions(+), 16 deletions(-) diff --git a/block/elevator-fq.c b/block/elevator-fq.c index 67c02b9..0acfa2c 100644 --- a/block/elevator-fq.c +++ b/block/elevator-fq.c @@ -42,6 +42,69 @@ static int elv_rate_sampling_window = HZ / 10; */ #define WFQ_SERVICE_SHIFT 22 +#ifdef CONFIG_GROUP_IOSCHED +#define for_each_entity(entity) \ + for (; entity != NULL; entity = entity->parent) + +#define for_each_entity_safe(entity, parent) \ + for (; entity && ({ parent = entity->parent; 1; }); entity = parent) + + +static struct io_entity *bfq_lookup_next_entity(struct io_sched_data *sd, + int extract); + +static int bfq_update_next_active(struct io_sched_data *sd) +{ + struct io_group *iog; + struct io_entity *entity, *next_active; + + if (sd->active_entity != NULL) + /* will update/requeue at the end of service */ + return 0; + + /* + * NOTE: this can be improved in may ways, such as returning + * 1 (and thus propagating upwards the update) only when the + * budget changes, or caching the bfqq that will be scheduled + * next from this subtree. By now we worry more about + * correctness than about performance... + */ + next_active = bfq_lookup_next_entity(sd, 0); + sd->next_active = next_active; + + if (next_active != NULL) { + iog = container_of(sd, struct io_group, sched_data); + entity = iog->my_entity; + if (entity != NULL) + entity->budget = next_active->budget; + } + + return 1; +} + +static inline void bfq_check_next_active(struct io_sched_data *sd, + struct io_entity *entity) +{ + BUG_ON(sd->next_active != entity); +} +#else /* GROUP_IOSCHED */ +#define for_each_entity(entity) \ + for (; entity != NULL; entity = NULL) + +#define for_each_entity_safe(entity, parent) \ + for (parent = NULL; entity != NULL; entity = parent) + +static inline int bfq_update_next_active(struct io_sched_data *sd) +{ + return 0; +} + +static inline void bfq_check_next_active(struct io_sched_data *sd, + struct io_entity *entity) +{ +} +#endif /* GROUP_IOSCHED */ + static inline int elv_prio_slice(struct elv_fq_data *efqd, int sync, unsigned short prio) { @@ -587,8 +650,10 @@ static struct io_entity *bfq_lookup_next_entity(struct io_sched_data *sd, entity = __bfq_lookup_next_entity(st); if (entity != NULL) { if (extract) { + bfq_check_next_active(sd, entity); bfq_active_remove(st, entity); sd->active_entity = entity; + sd->next_active = NULL; } break; } @@ -661,11 +726,8 @@ static void __bfq_activate_entity(struct io_entity *entity, int add_front) if (add_front) { struct io_entity *next_entity; - /* - * Determine the entity which will be dispatched next - * Use sd->next_active once hierarchical patch is applied - */ - next_entity = bfq_lookup_next_entity(sd, 0); + /* Determine the entity which will be dispatched next */ + next_entity = sd->next_active; if (next_entity && next_entity != entity) { struct io_service_tree *new_st; @@ -697,7 +759,21 @@ static void __bfq_activate_entity(struct io_entity *entity, int add_front) */ static void bfq_activate_entity(struct io_entity *entity, int add_front) { - __bfq_activate_entity(entity, add_front); + struct io_sched_data *sd; + + for_each_entity(entity) { + __bfq_activate_entity(entity, add_front); + + add_front = 0; + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + /* + * No need to propagate the activation to the + * upper entities, as they will be updated when + * the active entity is rescheduled. + */ + break; + } } /** @@ -732,6 +808,8 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue) bfq_idle_remove(st, entity); else if (entity->tree != NULL) BUG(); + if (was_active || sd->next_active == entity) + ret = bfq_update_next_active(sd); if (!requeue || !bfq_gt(entity->finish, st->vtime)) bfq_forget_entity(st, entity); @@ -739,6 +817,7 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue) bfq_idle_insert(st, entity); BUG_ON(sd->active_entity == entity); + BUG_ON(sd->next_active == entity); return ret; } @@ -750,18 +829,62 @@ static int __bfq_deactivate_entity(struct io_entity *entity, int requeue) */ static void bfq_deactivate_entity(struct io_entity *entity, int requeue) { - __bfq_deactivate_entity(entity, requeue); + struct io_sched_data *sd; + struct io_entity *parent; + + for_each_entity_safe(entity, parent) { + sd = entity->sched_data; + + if (!__bfq_deactivate_entity(entity, requeue)) + /* + * The parent entity is still backlogged, and + * we don't need to update it as it is still + * under service. + */ + break; + + if (sd->next_active != NULL) { + /* + * The parent entity is still backlogged and + * the budgets on the path towards the root + * need to be updated. + */ + goto update; + } + + /* + * If we reach there the parent is no more backlogged and + * we want to propagate the dequeue upwards. + * + */ + + requeue = 1; + } + + return; + +update: + entity = parent; + for_each_entity(entity) { + __bfq_activate_entity(entity, 0); + + sd = entity->sched_data; + if (!bfq_update_next_active(sd)) + break; + } } static void entity_served(struct io_entity *entity, unsigned long served) { struct io_service_tree *st; - st = io_entity_service_tree(entity); - entity->service += served; - BUG_ON(st->wsum == 0); - st->vtime += bfq_delta(served, st->wsum); - bfq_forget_idle(st); + for_each_entity(entity) { + st = io_entity_service_tree(entity); + entity->service += served; + BUG_ON(st->wsum == 0); + st->vtime += bfq_delta(served, st->wsum); + bfq_forget_idle(st); + } } /** @@ -1154,11 +1277,25 @@ static struct io_queue *elv_get_next_ioq(struct request_queue *q, int extract) return NULL; sd = &efqd->root_group->sched_data; - entity = bfq_lookup_next_entity(sd, 1); - BUG_ON(!entity); - if (extract) - entity->service = 0; + for (; sd != NULL; sd = entity->my_sched_data) { + entity = bfq_lookup_next_entity(sd, 1); + /* + * entity can be null despite the fact that there are busy + * queues. if all the busy queues are under a group which is + * currently under service. + * So if we are just looking for next ioq while something is + * being served, null entity is not an error. + */ + BUG_ON(!entity && extract); + + if (extract) + entity->service = 0; + + if (!entity) + return NULL; + } + ioq = io_entity_to_ioq(entity); return ioq; diff --git a/block/elevator-fq.h b/block/elevator-fq.h index 4b69239..57207c4 100644 --- a/block/elevator-fq.h +++ b/block/elevator-fq.h @@ -72,6 +72,7 @@ struct io_service_tree { */ struct io_sched_data { struct io_entity *active_entity; + struct io_entity *next_active; struct io_service_tree service_tree[IO_IOPRIO_CLASSES]; }; @@ -181,7 +182,10 @@ struct io_queue { }; struct io_group { + struct io_entity entity; struct io_sched_data sched_data; + struct io_entity *my_entity; + /* * async queue for each priority case for RT and BE class. * Used only for cfq. diff --git a/init/Kconfig b/init/Kconfig index 1ce05a4..a380f46 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -612,6 +612,14 @@ config CGROUP_MEM_RES_CTLR_SWAP Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. +config GROUP_IOSCHED + bool "Group IO Scheduler" + depends on CGROUPS && ELV_FAIR_QUEUING + default n + ---help--- + This feature lets IO scheduler recognize task groups and control + disk bandwidth allocation to such task groups. + endif # CGROUPS config MM_OWNER