[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]
[dm-devel] Re: [PATCH] io-controller: do some changes of io.policy interface
- From: Vivek Goyal <vgoyal redhat com>
- To: Gui Jianfeng <guijianfeng cn fujitsu com>
- Cc: dhaval linux vnet ibm com, snitzer redhat com, peterz infradead org, dm-devel redhat com, dpshah google com, jens axboe oracle com, agk redhat com, balbir linux vnet ibm com, paolo valente unimore it, fernando oss ntt co jp, mikew google com, jmoyer redhat com, nauman google com, m-ikeda ds jp nec com, lizf cn fujitsu com, fchecconi gmail com, Paul Menage <menage google com>, akpm linux-foundation org, jbaron redhat com, linux-kernel vger kernel org, s-uchida ap jp nec com, righi andrea gmail com, containers lists linux-foundation org
- Subject: [dm-devel] Re: [PATCH] io-controller: do some changes of io.policy interface
- Date: Thu, 25 Jun 2009 08:55:13 -0400
On Thu, Jun 25, 2009 at 06:23:52PM +0800, Gui Jianfeng wrote:
> Paul Menage wrote:
> > On Fri, Jun 19, 2009 at 1:37 PM, Vivek Goyal<vgoyal redhat com> wrote:
> >> You can use the following format to play with the new interface.
> >> #echo DEV:weight:ioprio_class > /patch/to/cgroup/policy
> >> weight=0 means removing the policy for DEV.
> >>
> >> Examples:
> >> Configure weight=300 ioprio_class=2 on /dev/hdb in this cgroup
> >> # echo /dev/hdb:300:2 > io.policy
> >> # cat io.policy
> >> dev weight class
> >> /dev/hdb 300 2
> >
> > I think that the read and write should be consistent. Can you just use
> > white-space separation for both, rather than colon-separation for
> > writes and white-space separation for reads?
> >
> > Also, storing device inode paths statically as strings into the
> > io_policy structure seems wrong, since it's quite possible for the
> > device node that was used originally to be gone by the time that
> > someone reads the io.policy file, or renamed, or even replaced with an
> > inode that refers to to a different block device
> >
> > My preferred alternatives would be:
> >
> > - read/write the value as a device number rather than a name
> > - read/write the block device's actual name (e.g. hda or sda) rather
> > than a path to the inode
> >
>
> Hi Paul, Vivek
>
> Here is a patch to fix the issue Paul raised.
>
> This patch achives the following goals
> 1 According to Paul's comment, Modifing io.policy interface to
> use device number for read/write directly.
> 2 Just use white-space separation for both, rather than colon-
> separation for writes and white-space separation for reads.
> 3 Do more strict checking for inputting.
>
> old interface:
> Configure weight=300 ioprio_class=2 on /dev/hdb in this cgroup
> # echo "/dev/hdb:300:2" > io.policy
> # cat io.policy
> dev weight class
> /dev/hdb 300 2
>
> new interface:
> Configure weight=300 ioprio_class=2 on /dev/hdb in this cgroup
> # echo "3:64 300 2" > io.policy
> # cat io.policy
> dev weight class
> 3:64 300 2
>
> Signed-off-by: Gui Jianfeng <guijianfeng cn fujitsu com>
> ---
> block/elevator-fq.c | 59 ++++++++++++++++++++++++++++++++++----------------
> block/elevator-fq.h | 1 -
> 2 files changed, 40 insertions(+), 20 deletions(-)
>
> diff --git a/block/elevator-fq.c b/block/elevator-fq.c
> index d779282..83c831b 100644
> --- a/block/elevator-fq.c
> +++ b/block/elevator-fq.c
> @@ -1895,12 +1895,12 @@ static int io_cgroup_policy_read(struct cgroup *cgrp, struct cftype *cft,
> if (list_empty(&iocg->policy_list))
> goto out;
>
> - seq_printf(m, "dev weight class\n");
> + seq_printf(m, "dev\tweight\tclass\n");
>
> spin_lock_irq(&iocg->lock);
> list_for_each_entry(pn, &iocg->policy_list, node) {
> - seq_printf(m, "%s %lu %lu\n", pn->dev_name,
> - pn->weight, pn->ioprio_class);
> + seq_printf(m, "%u:%u\t%lu\t%lu\n", MAJOR(pn->dev),
> + MINOR(pn->dev), pn->weight, pn->ioprio_class);
> }
> spin_unlock_irq(&iocg->lock);
> out:
> @@ -1936,44 +1936,65 @@ static struct io_policy_node *policy_search_node(const struct io_cgroup *iocg,
> return NULL;
> }
>
> -static int devname_to_devnum(const char *buf, dev_t *dev)
> +static int check_dev_num(dev_t dev)
> {
> - struct block_device *bdev;
> + int part = 0;
> struct gendisk *disk;
> - int part;
>
> - bdev = lookup_bdev(buf);
> - if (IS_ERR(bdev))
> + disk = get_gendisk(dev, &part);
> + if (!disk || part)
> return -ENODEV;
>
> - disk = get_gendisk(bdev->bd_dev, &part);
> - if (part)
> - return -EINVAL;
> -
> - *dev = MKDEV(disk->major, disk->first_minor);
> - bdput(bdev);
> -
> return 0;
> }
>
> static int policy_parse_and_set(char *buf, struct io_policy_node *newpn)
> {
> - char *s[3], *p;
> + char *s[4], *p, *major_s = NULL, *minor_s = NULL;
> int ret;
> + unsigned long major, minor;
> int i = 0;
> + dev_t dev;
>
> memset(s, 0, sizeof(s));
> - while ((p = strsep(&buf, ":")) != NULL) {
> + while ((p = strsep(&buf, " ")) != NULL) {
> if (!*p)
> continue;
> s[i++] = p;
> +
> + /* Prevent from inputing too many things */
> + if (i == 4)
> + break;
> }
>
> - ret = devname_to_devnum(s[0], &newpn->dev);
> + if (i != 3)
> + return -EINVAL;
> +
> + p = strsep(&s[0], ":");
> + if (p != NULL)
> + major_s = p;
> + else
> + return -EINVAL;
> +
> + minor_s = s[0];
> + if (!minor_s)
> + return -EINVAL;
> +
> + ret = strict_strtoul(major_s, 10, &major);
> + if (ret)
> + return -EINVAL;
> +
> + ret = strict_strtoul(minor_s, 10, &minor);
> + if (ret)
> + return -EINVAL;
> +
> + dev = MKDEV(major, minor);
> +
> + ret = check_dev_num(dev);
> if (ret)
> return ret;
>
> - strcpy(newpn->dev_name, s[0]);
> + newpn->dev = dev;
>
> if (s[1] == NULL)
> return -EINVAL;
> diff --git a/block/elevator-fq.h b/block/elevator-fq.h
> index b3193f8..7722ebe 100644
> --- a/block/elevator-fq.h
> +++ b/block/elevator-fq.h
> @@ -286,7 +286,6 @@ struct io_group {
>
> struct io_policy_node {
> struct list_head node;
> - char dev_name[32];
> dev_t dev;
> unsigned long weight;
> unsigned long ioprio_class;
Hi Gui,
Thanks for the patch. "unsigned long" for ioprio_class is too big. How
about using "unsigned short"? I noticed that in io_cgroup also we are
using "unsigned long". I will fix that.
For storing weight now we are planning to use "unsigned int". Can you
please switch to that.
Thanks
Vivek
[Date Prev][Date Next] [Thread Prev][Thread Next]
[Thread Index]
[Date Index]
[Author Index]