diff mbox

[v7,for,Xen,4.7,1/4] xen: enable per-VCPU parameter settings for RTDS scheduler

Message ID 1458146871-2813-2-git-send-email-lichong659@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chong Li March 16, 2016, 4:47 p.m. UTC
Add XEN_DOMCTL_SCHEDOP_getvcpuinfo and _putvcpuinfo hypercalls
to independently get and set the scheduling parameters of each
vCPU of a domain

Signed-off-by: Chong Li <chong.li@wustl.edu>
Signed-off-by: Meng Xu <mengxu@cis.upenn.edu>
Signed-off-by: Sisu Xi <xisisu@gmail.com>

---
Changes on PATCH v6:
1) Add explain for nr_vcpus in struct xen_domctl_scheduler_op, because it is used
in both IN and OUT ways.
2) Remove the check and warning for vcpu settings with low budget or budget. Making this
feature "per-domain" or "per-operation" is one of the future work.
3) In the *cntl() functions in sched_credit.c and sched_credit2.c, change the "if-else"
structure to "switch" structure.
4) In rt_dom_cntl(), use copy_to_guest* instead of __copy_to_guest*, because the latter one
requires lock protection.

Changes on PATCH v5:
1) When processing XEN_DOMCTL_SCHEDOP_get/putvcpuinfo, we do
preemption check in a similar way to XEN_SYSCTL_pcitopoinfo

Changes on PATCH v4:
1) Add uint32_t vcpu_index to struct xen_domctl_scheduler_op.
When processing XEN_DOMCTL_SCHEDOP_get/putvcpuinfo, we call
hypercall_preemption_check in case the current hypercall lasts
too long. If we decide to preempt the current hypercall, we record
the index of the most-recent finished vcpu into the vcpu_index of
struct xen_domctl_scheduler_op. So when we resume the hypercall after
preemption, we start processing from the posion specified by vcpu_index,
and don't need to repeat the work that has already been done in the
hypercall before the preemption.
(This design is based on the do_grant_table_op() in grant_table.c)

2) Coding style changes

Changes on PATCH v3:
1) Remove struct xen_domctl_schedparam_t.

2) Change struct xen_domctl_scheduler_op.

3) Check if period/budget is within a validated range

Changes on PATCH v2:
1) Change struct xen_domctl_scheduler_op, for transferring per-vcpu parameters
between libxc and hypervisor.

2) Handler of XEN_DOMCTL_SCHEDOP_getinfo now just returns the default budget and
period values of RTDS scheduler.

3) Handler of XEN_DOMCTL_SCHEDOP_getvcpuinfo now can return a random subset of
the parameters of the VCPUs of a specific domain

CC: <dario.faggioli@citrix.com>
CC: <george.dunlap@eu.citrix.com>
CC: <dgolomb@seas.upenn.edu>
CC: <mengxu@cis.upenn.edu>
CC: <jbeulich@suse.com>
CC: <lichong659@gmail.com>
---
 xen/common/sched_credit.c   |  17 ++++---
 xen/common/sched_credit2.c  |  16 ++++---
 xen/common/sched_rt.c       | 114 ++++++++++++++++++++++++++++++++++++++------
 xen/common/schedule.c       |  15 ++++--
 xen/include/public/domctl.h |  63 +++++++++++++++++++-----
 5 files changed, 183 insertions(+), 42 deletions(-)

Comments

Dario Faggioli March 17, 2016, 10:03 a.m. UTC | #1
On Wed, 2016-03-16 at 11:47 -0500, Chong Li wrote:
> --- a/xen/common/sched_credit.c
> +++ b/xen/common/sched_credit.c
> @@ -1054,15 +1054,16 @@ csched_dom_cntl(
>       * lock. Runq lock not needed anywhere in here. */
>      spin_lock_irqsave(&prv->lock, flags);
>  
> -    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
> +    switch ( op->cmd )
>      {
> +    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
> +    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
> +        return -EINVAL;
> +    case XEN_DOMCTL_SCHEDOP_getinfo:
>          op->u.credit.weight = sdom->weight;
>          op->u.credit.cap = sdom->cap;
>
Not feeling to strong about it, but I think

    switch ( op->cmd )
    {
    case XEN_DOMCTL_SCHEDOP_getinfo:
        op->u.credit.weight = sdom->weight;
        op->u.credit.cap = sdom->cap;
        break;
    case XEN_DOMCTL_SCHEDOP_putinfo:
        if ( op->u.credit.weight != 0 )
        {
            if ( !list_empty(&sdom->active_sdom_elem) )
            {
                prv->weight -= sdom->weight * sdom->active_vcpu_count;
                prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
            }
            sdom->weight = op->u.credit.weight;
        }

        if ( op->u.credit.cap != (uint16_t)~0U )
            sdom->cap = op->u.credit.cap;
        break;
    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
    default:
        return -EINVAL;
    }

(i.e., grouping the cases that needs only returning -EINVAL) is better,
the final result, more than the patch itself.

And the same for Credit2, of course.

> --- a/xen/common/sched_rt.c
> +++ b/xen/common/sched_rt.c
> @@ -1129,24 +1145,22 @@ rt_dom_cntl(
>      struct vcpu *v;
>      unsigned long flags;
>      int rc = 0;
> +    xen_domctl_schedparam_vcpu_t local_sched;
> +    s_time_t period, budget;
> +    uint32_t index = 0;
>  
>      switch ( op->cmd )
>      {
>      case XEN_DOMCTL_SCHEDOP_getinfo:
> -        if ( d->max_vcpus > 0 )
> -        {
> -            spin_lock_irqsave(&prv->lock, flags);
> -            svc = rt_vcpu(d->vcpu[0]);
> -            op->u.rtds.period = svc->period / MICROSECS(1);
> -            op->u.rtds.budget = svc->budget / MICROSECS(1);
> -            spin_unlock_irqrestore(&prv->lock, flags);
> -        }
> -        else
> -        {
> -            /* If we don't have vcpus yet, let's just return the
> defaults. */
> -            op->u.rtds.period = RTDS_DEFAULT_PERIOD;
> -            op->u.rtds.budget = RTDS_DEFAULT_BUDGET;
> -        }
> +        /* Return the default parameters.
> +         * A previous bug fixed here:
> +         * The default PERIOD or BUDGET should be divided by
> MICROSECS(1),
> +         * before returned to upper caller.
> +         */
Comment style (missing opening '/*').

Also, putting this kind of things in comments is not at all ideal. If
doing this in this patch, you should mention it in the changelog. Or
you do it in a separate patch (that you put before this one in the
series).

I'd say that, in this specific case, is not a big deal which one of the
two approaches you take (mentioning or separate patch), but the having
a separate one is indeed almost always preferable (e.g., the fix can be
backported).

> +        spin_lock_irqsave(&prv->lock, flags);
> +        op->u.rtds.period = RTDS_DEFAULT_PERIOD / MICROSECS(1);
> +        op->u.rtds.budget = RTDS_DEFAULT_BUDGET / MICROSECS(1);
> +        spin_unlock_irqrestore(&prv->lock, flags);
>
I don't think we need to take the lock here... RTDS_DEFAULT_PERIOD is
not going to change under our feet. :-)

> @@ -1163,6 +1177,78 @@ rt_dom_cntl(
>          }
>          spin_unlock_irqrestore(&prv->lock, flags);
>          break;
> +    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
> +        while ( index < op->u.v.nr_vcpus )
> +        {
> +            if ( copy_from_guest_offset(&local_sched,
> +                                        op->u.v.vcpus, index, 1) )
> +            {
> +                rc = -EFAULT;
> +                break;
> +            }
> +            if ( local_sched.vcpuid >= d->max_vcpus ||
> +                 d->vcpu[local_sched.vcpuid] == NULL )
> +            {
> +                rc = -EINVAL;
> +                break;
> +            }
> +
> +            spin_lock_irqsave(&prv->lock, flags);
> +            svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);
> +            local_sched.u.rtds.budget = svc->budget / MICROSECS(1);
> +            local_sched.u.rtds.period = svc->period / MICROSECS(1);
> +            spin_unlock_irqrestore(&prv->lock, flags);
> +
> +            if ( copy_to_guest_offset(op->u.v.vcpus, index,
> +                                        &local_sched, 1) )
> +            {
> +                rc = -EFAULT;
> +                break;
> +            }
> +            if ( (++index > 0x3f) && hypercall_preempt_check() )
> +                break;
>
So, I know it's 0x3f in XEN_SYSCTL_pcitopoinfo, but unless I'm missing
something, I don't see why this can't be "63".

I'd also put a short comment right above, something like:

 /* Process a most 64 vCPUs without checking for preemptions. */

> +        }
> +        if ( !rc ) /* notify upper caller how many vcpus have been
> processed */
>
Move the comment to the line above (and terminate it with a full stop).

And by the way, there's a lot of code repetition here. What about
handling get and set together, sort of like this:

    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:                                                                              |
    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:                                                                              |#ifdef CONFIG_HAS_PCI
        while ( index < op->u.v.nr_vcpus )                                                                            |    case XEN_SYSCTL_pcitopoinfo:
        {                                                                                                             |    {
            if ( copy_from_guest_offset(&local_sched,                                                                 |        xen_sysctl_pcitopoinfo_t *ti = &op->u.pcitopoinfo;
                                        op->u.v.vcpus, index, 1) )                                                    |        unsigned int i = 0;
            {                                                                                                         |
                rc = -EFAULT;                                                                                         |        if ( guest_handle_is_null(ti->devs) ||
                break;                                                                                                |             guest_handle_is_null(ti->nodes) )
            }                                                                                                         |        {
                                                                                                                      |            ret = -EINVAL;
            if ( local_sched.vcpuid >= d->max_vcpus ||                                                                |            break;
                 d->vcpu[local_sched.vcpuid] == NULL )                                                                |        }
            {                                                                                                         |
                rc = -EINVAL;                                                                                         |        while ( i < ti->num_devs )
                break;                                                                                                |        {
            }                                                                                                         |            physdev_pci_device_t dev;
                                                                                                                      |            uint32_t node;
            if ( op->cmd == XEN_DOMCTL_SCHEDOP_getvcpuinfo )                                                          |            const struct pci_dev *pdev;
            {                                                                                                         |
                spin_lock_irqsave(&prv->lock, flags);                                                                 |            if ( copy_from_guest_offset(&dev, ti->devs, i, 1) )
                svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);                                                           |            {
                local_sched.u.rtds.budget = svc->budget / MICROSECS(1);                                               |                ret = -EFAULT;
                local_sched.u.rtds.period = svc->period / MICROSECS(1);                                               |                break;
                spin_unlock_irqrestore(&prv->lock, flags);                                                            |            }
                                                                                                                      |
                if ( copy_to_guest_offset(op->u.v.vcpus, index,                                                       |            pcidevs_lock();
                                          &local_sched, 1) )                                                          |            pdev = pci_get_pdev(dev.seg, dev.bus, dev.devfn);
                {                                                                                                     |            if ( !pdev )
                    rc = -EFAULT;                                                                                     |                node = XEN_INVALID_DEV;
                    break;                                                                                            |            else if ( pdev->node == NUMA_NO_NODE )
                }                                                                                                     |                node = XEN_INVALID_NODE_ID;
            }                                                                                                         |            else
            else                                                                                                      |                node = pdev->node;
            {                                                                                                         |            pcidevs_unlock();
                period = MICROSECS(local_sched.u.rtds.period);                                                        |
                budget = MICROSECS(local_sched.u.rtds.budget);                                                        |            if ( copy_to_guest_offset(ti->nodes, i, &node, 1) )
                if ( period > RTDS_MAX_PERIOD || budget < RTDS_MIN_BUDGET ||                                          |            {
                     budget > period || period < RTDS_MIN_PERIOD )                                                    |                ret = -EFAULT;
                {                                                                                                     |                break;
                    rc = -EINVAL;                                                                                     |            }
                    break;                                                                                            |
                }                                                                                                     |            /* Process a most 64 vCPUs without checking for preemptions. */
                                                                                                                      |            if ( (++i > 0x3f) && hypercall_preempt_check() )
                spin_lock_irqsave(&prv->lock, flags);                                                                 |                break;
                svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);                                                           |        }
                svc->period = period;                                                                                 |
                svc->budget = budget;                                                                                 |        if ( !ret && (ti->num_devs != i) )
                spin_unlock_irqrestore(&prv->lock, flags);                                                            |        {
                                                                                                                      |            ti->num_devs = i;
            }                                                                                                         |            if ( __copy_field_to_guest(u_sysctl, op, u.pcitopoinfo.num_devs) )
            /* Process a most 64 vCPUs without checking for preemptions. */                                           |                ret = -EFAULT;
            if ( (++index > 63) && hypercall_preempt_check() )                                                        |        }
                break;                                                                                                |        break;
        }                                                                                                             |    }
                                                                                                                      |#endif
        /* Notify upper caller how many vcpus have been processed. */                                                 |
        if ( !rc )                                                                                                    |    case XEN_SYSCTL_tmem_op:
            op->u.v.nr_vcpus = index;                                                                                 |        ret = tmem_control(&op->u.tmem_op);
        break;

I have only compile tested this, but it looks to me that it can fly...

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h

> + * Set or get info?
> + * For schedulers supporting per-vcpu settings (e.g., RTDS):
> + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
> + *  XEN_DOMCTL_SCHEDOP_getinfo gets default params;
> + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo sets (gets) params of vcpus;
> + *
> + * For schedulers not supporting per-vcpu settings:
> + *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
> + *  XEN_DOMCTL_SCHEDOP_getinfo gets domain-wise params;
> + *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo returns error;
> + */
>  #define XEN_DOMCTL_SCHEDOP_putinfo 0
>  #define XEN_DOMCTL_SCHEDOP_getinfo 1
> +#define XEN_DOMCTL_SCHEDOP_putvcpuinfo 2
> +#define XEN_DOMCTL_SCHEDOP_getvcpuinfo 3
>  struct xen_domctl_scheduler_op {
>      uint32_t sched_id;  /* XEN_SCHEDULER_* */
>      uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */

       /* IN/OUT */
>      union {
> -        struct xen_domctl_sched_credit {
> -            uint16_t weight;
> -            uint16_t cap;
> -        } credit;
> -        struct xen_domctl_sched_credit2 {
> -            uint16_t weight;
> -        } credit2;
> -        struct xen_domctl_sched_rtds {
> -            uint32_t period;
> -            uint32_t budget;
> -        } rtds;
> +        xen_domctl_sched_credit_t credit;
> +        xen_domctl_sched_credit2_t credit2;
> +        xen_domctl_sched_rtds_t rtds;
> +        struct {
> +            XEN_GUEST_HANDLE_64(xen_domctl_schedparam_vcpu_t) vcpus;
> +            /*
> +             * IN: Number of elements in vcpus array.
> +             * OUT: Number of processed elements of vcpus array.
> +             */
> +            uint32_t nr_vcpus;
> +            uint32_t padding;
> +        } v;
>      } u;
>  };
>
That is: make it even more clear that the whole union is used as
IN/OUT.

Then, indeed, inside v, what is the meaning of the nr_vcpus field in
each direction, as you're doing already.

Regards,
Dario
Chong Li March 17, 2016, 8:42 p.m. UTC | #2
On Thu, Mar 17, 2016 at 5:03 AM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On Wed, 2016-03-16 at 11:47 -0500, Chong Li wrote:

>
>> --- a/xen/common/sched_rt.c
>> +++ b/xen/common/sched_rt.c
>> @@ -1129,24 +1145,22 @@ rt_dom_cntl(
>>      struct vcpu *v;
>>      unsigned long flags;
>>      int rc = 0;
>> +    xen_domctl_schedparam_vcpu_t local_sched;
>> +    s_time_t period, budget;
>> +    uint32_t index = 0;
>>
>>      switch ( op->cmd )
>>      {
>>      case XEN_DOMCTL_SCHEDOP_getinfo:
>> -        if ( d->max_vcpus > 0 )
>> -        {
>> -            spin_lock_irqsave(&prv->lock, flags);
>> -            svc = rt_vcpu(d->vcpu[0]);
>> -            op->u.rtds.period = svc->period / MICROSECS(1);
>> -            op->u.rtds.budget = svc->budget / MICROSECS(1);
>> -            spin_unlock_irqrestore(&prv->lock, flags);
>> -        }
>> -        else
>> -        {
>> -            /* If we don't have vcpus yet, let's just return the
>> defaults. */
>> -            op->u.rtds.period = RTDS_DEFAULT_PERIOD;
>> -            op->u.rtds.budget = RTDS_DEFAULT_BUDGET;
>> -        }
>> +        /* Return the default parameters.
>> +         * A previous bug fixed here:
>> +         * The default PERIOD or BUDGET should be divided by
>> MICROSECS(1),
>> +         * before returned to upper caller.
>> +         */
> Comment style (missing opening '/*').
>
> Also, putting this kind of things in comments is not at all ideal. If
> doing this in this patch, you should mention it in the changelog. Or
> you do it in a separate patch (that you put before this one in the
> series).
>
> I'd say that, in this specific case, is not a big deal which one of the
> two approaches you take (mentioning or separate patch), but the having
> a separate one is indeed almost always preferable (e.g., the fix can be
> backported).

If I choose mentioning, do I move the comment to the changelog? Or do I keep
it here and say it again in the changelog?

Chong
Jan Beulich March 18, 2016, 7:39 a.m. UTC | #3
>>> On 17.03.16 at 21:42, <lichong659@gmail.com> wrote:
> On Thu, Mar 17, 2016 at 5:03 AM, Dario Faggioli
> <dario.faggioli@citrix.com> wrote:
>> On Wed, 2016-03-16 at 11:47 -0500, Chong Li wrote:
> 
>>
>>> --- a/xen/common/sched_rt.c
>>> +++ b/xen/common/sched_rt.c
>>> @@ -1129,24 +1145,22 @@ rt_dom_cntl(
>>>      struct vcpu *v;
>>>      unsigned long flags;
>>>      int rc = 0;
>>> +    xen_domctl_schedparam_vcpu_t local_sched;
>>> +    s_time_t period, budget;
>>> +    uint32_t index = 0;
>>>
>>>      switch ( op->cmd )
>>>      {
>>>      case XEN_DOMCTL_SCHEDOP_getinfo:
>>> -        if ( d->max_vcpus > 0 )
>>> -        {
>>> -            spin_lock_irqsave(&prv->lock, flags);
>>> -            svc = rt_vcpu(d->vcpu[0]);
>>> -            op->u.rtds.period = svc->period / MICROSECS(1);
>>> -            op->u.rtds.budget = svc->budget / MICROSECS(1);
>>> -            spin_unlock_irqrestore(&prv->lock, flags);
>>> -        }
>>> -        else
>>> -        {
>>> -            /* If we don't have vcpus yet, let's just return the
>>> defaults. */
>>> -            op->u.rtds.period = RTDS_DEFAULT_PERIOD;
>>> -            op->u.rtds.budget = RTDS_DEFAULT_BUDGET;
>>> -        }
>>> +        /* Return the default parameters.
>>> +         * A previous bug fixed here:
>>> +         * The default PERIOD or BUDGET should be divided by
>>> MICROSECS(1),
>>> +         * before returned to upper caller.
>>> +         */
>> Comment style (missing opening '/*').
>>
>> Also, putting this kind of things in comments is not at all ideal. If
>> doing this in this patch, you should mention it in the changelog. Or
>> you do it in a separate patch (that you put before this one in the
>> series).
>>
>> I'd say that, in this specific case, is not a big deal which one of the
>> two approaches you take (mentioning or separate patch), but the having
>> a separate one is indeed almost always preferable (e.g., the fix can be
>> backported).
> 
> If I choose mentioning, do I move the comment to the changelog? Or do I keep
> it here and say it again in the changelog?

Just consider what would happen if everyone mentioned in
comments the bugs they fixed. I think the answer to you question
is obvious then...

Jan
Jan Beulich March 18, 2016, 7:48 a.m. UTC | #4
>>> On 17.03.16 at 11:03, <dario.faggioli@citrix.com> wrote:
> On Wed, 2016-03-16 at 11:47 -0500, Chong Li wrote:
>> --- a/xen/common/sched_credit.c
>> +++ b/xen/common/sched_credit.c
>> @@ -1054,15 +1054,16 @@ csched_dom_cntl(
>>       * lock. Runq lock not needed anywhere in here. */
>>      spin_lock_irqsave(&prv->lock, flags);
>>  
>> -    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
>> +    switch ( op->cmd )
>>      {
>> +    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
>> +    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
>> +        return -EINVAL;
>> +    case XEN_DOMCTL_SCHEDOP_getinfo:
>>          op->u.credit.weight = sdom->weight;
>>          op->u.credit.cap = sdom->cap;
>>
> Not feeling to strong about it, but I think
> 
>     switch ( op->cmd )
>     {
>     case XEN_DOMCTL_SCHEDOP_getinfo:
>         op->u.credit.weight = sdom->weight;
>         op->u.credit.cap = sdom->cap;
>         break;
>     case XEN_DOMCTL_SCHEDOP_putinfo:
>         if ( op->u.credit.weight != 0 )
>         {
>             if ( !list_empty(&sdom->active_sdom_elem) )
>             {
>                 prv->weight -= sdom->weight * sdom->active_vcpu_count;
>                 prv->weight += op->u.credit.weight * sdom->active_vcpu_count;
>             }
>             sdom->weight = op->u.credit.weight;
>         }
> 
>         if ( op->u.credit.cap != (uint16_t)~0U )
>             sdom->cap = op->u.credit.cap;
>         break;
>     case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
>     case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
>     default:
>         return -EINVAL;
>     }
> 
> (i.e., grouping the cases that needs only returning -EINVAL) is better,
> the final result, more than the patch itself.

In fact there's no point in explicitly naming the unhandled values
as long as they're matching what "default:" does.

Jan
Dario Faggioli March 18, 2016, 10:47 a.m. UTC | #5
On Fri, 2016-03-18 at 01:39 -0600, Jan Beulich wrote:
> > 
> > > > On 17.03.16 at 21:42, <lichong659@gmail.com> wrote:
> > On Thu, Mar 17, 2016 at 5:03 AM, Dario Faggioli
> > <dario.faggioli@citrix.com> wrote:
> > > 
> > > I'd say that, in this specific case, is not a big deal which one
> > > of the
> > > two approaches you take (mentioning or separate patch), but the
> > > having
> > > a separate one is indeed almost always preferable (e.g., the fix
> > > can be
> > > backported).
> > If I choose mentioning, do I move the comment to the changelog? Or
> > do I keep
> > it here and say it again in the changelog?
> Just consider what would happen if everyone mentioned in
> comments the bugs they fixed. I think the answer to you question
> is obvious then...
>
Exactly! :-)

And, please (Chong), let me restate this: "having a separate one is
indeed almost always preferable (e.g., the fix can be backported)"

Regards,
Dario
Chong Li March 18, 2016, 8:22 p.m. UTC | #6
On Fri, Mar 18, 2016 at 5:47 AM, Dario Faggioli
<dario.faggioli@citrix.com> wrote:
> On Fri, 2016-03-18 at 01:39 -0600, Jan Beulich wrote:
>> >
>> > > > On 17.03.16 at 21:42, <lichong659@gmail.com> wrote:
>> > On Thu, Mar 17, 2016 at 5:03 AM, Dario Faggioli
>> > <dario.faggioli@citrix.com> wrote:
>> > >
>> > > I'd say that, in this specific case, is not a big deal which one
>> > > of the
>> > > two approaches you take (mentioning or separate patch), but the
>> > > having
>> > > a separate one is indeed almost always preferable (e.g., the fix
>> > > can be
>> > > backported).
>> > If I choose mentioning, do I move the comment to the changelog? Or
>> > do I keep
>> > it here and say it again in the changelog?
>> Just consider what would happen if everyone mentioned in
>> comments the bugs they fixed. I think the answer to you question
>> is obvious then...
>>
> Exactly! :-)
>
> And, please (Chong), let me restate this: "having a separate one is
> indeed almost always preferable (e.g., the fix can be backported)"
>
Considering the complexity of making a new patch and
today's the last day for posting, I'll mention this bug fix in the changlog.

Chong
diff mbox

Patch

diff --git a/xen/common/sched_credit.c b/xen/common/sched_credit.c
index 0dce790..82b0d14 100644
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -1054,15 +1054,16 @@  csched_dom_cntl(
      * lock. Runq lock not needed anywhere in here. */
     spin_lock_irqsave(&prv->lock, flags);
 
-    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
+    switch ( op->cmd )
     {
+    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
+    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
+        return -EINVAL;
+    case XEN_DOMCTL_SCHEDOP_getinfo:
         op->u.credit.weight = sdom->weight;
         op->u.credit.cap = sdom->cap;
-    }
-    else
-    {
-        ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
-
+        break;
+    case XEN_DOMCTL_SCHEDOP_putinfo:
         if ( op->u.credit.weight != 0 )
         {
             if ( !list_empty(&sdom->active_sdom_elem) )
@@ -1075,7 +1076,9 @@  csched_dom_cntl(
 
         if ( op->u.credit.cap != (uint16_t)~0U )
             sdom->cap = op->u.credit.cap;
-
+        break;
+    default:
+        return -EINVAL;
     }
 
     spin_unlock_irqrestore(&prv->lock, flags);
diff --git a/xen/common/sched_credit2.c b/xen/common/sched_credit2.c
index 3c49ffa..46d54bc 100644
--- a/xen/common/sched_credit2.c
+++ b/xen/common/sched_credit2.c
@@ -1421,14 +1421,15 @@  csched2_dom_cntl(
      * runq lock to update csvcs. */
     spin_lock_irqsave(&prv->lock, flags);
 
-    if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
+    switch ( op->cmd )
     {
+    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
+    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
+        return -EINVAL;
+    case XEN_DOMCTL_SCHEDOP_getinfo:
         op->u.credit2.weight = sdom->weight;
-    }
-    else
-    {
-        ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
-
+        break;
+    case XEN_DOMCTL_SCHEDOP_putinfo:
         if ( op->u.credit2.weight != 0 )
         {
             struct vcpu *v;
@@ -1457,6 +1458,9 @@  csched2_dom_cntl(
                 vcpu_schedule_unlock(lock, svc->vcpu);
             }
         }
+        break;
+    default:
+        return -EINVAL;
     }
 
     spin_unlock_irqrestore(&prv->lock, flags);
diff --git a/xen/common/sched_rt.c b/xen/common/sched_rt.c
index 3f1d047..359c2db 100644
--- a/xen/common/sched_rt.c
+++ b/xen/common/sched_rt.c
@@ -86,6 +86,22 @@ 
 #define RTDS_DEFAULT_PERIOD     (MICROSECS(10000))
 #define RTDS_DEFAULT_BUDGET     (MICROSECS(4000))
 
+/*
+ * Max period: max delta of time type, because period is added to the time
+ * a vcpu activates, so this must not overflow.
+ * Min period: 10 us, considering the scheduling overhead (when period is
+ * too low, scheduling is invoked too frequently, causing high overhead).
+ */
+#define RTDS_MAX_PERIOD     (STIME_DELTA_MAX)
+#define RTDS_MIN_PERIOD     (MICROSECS(10))
+
+/*
+ * Min budget: 10 us, considering the scheduling overhead (when budget is
+ * consumed too fast, scheduling is invoked too frequently, causing
+ * high overhead).
+ */
+#define RTDS_MIN_BUDGET     (MICROSECS(10))
+
 #define UPDATE_LIMIT_SHIFT      10
 #define MAX_SCHEDULE            (MILLISECS(1))
 /*
@@ -1129,24 +1145,22 @@  rt_dom_cntl(
     struct vcpu *v;
     unsigned long flags;
     int rc = 0;
+    xen_domctl_schedparam_vcpu_t local_sched;
+    s_time_t period, budget;
+    uint32_t index = 0;
 
     switch ( op->cmd )
     {
     case XEN_DOMCTL_SCHEDOP_getinfo:
-        if ( d->max_vcpus > 0 )
-        {
-            spin_lock_irqsave(&prv->lock, flags);
-            svc = rt_vcpu(d->vcpu[0]);
-            op->u.rtds.period = svc->period / MICROSECS(1);
-            op->u.rtds.budget = svc->budget / MICROSECS(1);
-            spin_unlock_irqrestore(&prv->lock, flags);
-        }
-        else
-        {
-            /* If we don't have vcpus yet, let's just return the defaults. */
-            op->u.rtds.period = RTDS_DEFAULT_PERIOD;
-            op->u.rtds.budget = RTDS_DEFAULT_BUDGET;
-        }
+        /* Return the default parameters.
+         * A previous bug fixed here:
+         * The default PERIOD or BUDGET should be divided by MICROSECS(1),
+         * before returned to upper caller.
+         */
+        spin_lock_irqsave(&prv->lock, flags);
+        op->u.rtds.period = RTDS_DEFAULT_PERIOD / MICROSECS(1);
+        op->u.rtds.budget = RTDS_DEFAULT_BUDGET / MICROSECS(1);
+        spin_unlock_irqrestore(&prv->lock, flags);
         break;
     case XEN_DOMCTL_SCHEDOP_putinfo:
         if ( op->u.rtds.period == 0 || op->u.rtds.budget == 0 )
@@ -1163,6 +1177,78 @@  rt_dom_cntl(
         }
         spin_unlock_irqrestore(&prv->lock, flags);
         break;
+    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
+        while ( index < op->u.v.nr_vcpus )
+        {
+            if ( copy_from_guest_offset(&local_sched,
+                                        op->u.v.vcpus, index, 1) )
+            {
+                rc = -EFAULT;
+                break;
+            }
+            if ( local_sched.vcpuid >= d->max_vcpus ||
+                 d->vcpu[local_sched.vcpuid] == NULL )
+            {
+                rc = -EINVAL;
+                break;
+            }
+
+            spin_lock_irqsave(&prv->lock, flags);
+            svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);
+            local_sched.u.rtds.budget = svc->budget / MICROSECS(1);
+            local_sched.u.rtds.period = svc->period / MICROSECS(1);
+            spin_unlock_irqrestore(&prv->lock, flags);
+
+            if ( copy_to_guest_offset(op->u.v.vcpus, index,
+                                        &local_sched, 1) )
+            {
+                rc = -EFAULT;
+                break;
+            }
+            if ( (++index > 0x3f) && hypercall_preempt_check() )
+                break;
+        }
+        if ( !rc ) /* notify upper caller how many vcpus have been processed */
+            op->u.v.nr_vcpus = index;
+        break;
+
+    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
+        while ( index < op->u.v.nr_vcpus )
+        {
+            if ( copy_from_guest_offset(&local_sched,
+                                        op->u.v.vcpus, index, 1) )
+            {
+                rc = -EFAULT;
+                break;
+            }
+            if ( local_sched.vcpuid >= d->max_vcpus ||
+                 d->vcpu[local_sched.vcpuid] == NULL )
+            {
+                rc = -EINVAL;
+                break;
+            }
+
+            period = MICROSECS(local_sched.u.rtds.period);
+            budget = MICROSECS(local_sched.u.rtds.budget);
+            if ( period > RTDS_MAX_PERIOD || budget < RTDS_MIN_BUDGET ||
+                 budget > period || period < RTDS_MIN_PERIOD )
+            {
+                rc = -EINVAL;
+                break;
+            }
+
+            spin_lock_irqsave(&prv->lock, flags);
+            svc = rt_vcpu(d->vcpu[local_sched.vcpuid]);
+            svc->period = period;
+            svc->budget = budget;
+            spin_unlock_irqrestore(&prv->lock, flags);
+
+            if ( (++index > 0x3f) && hypercall_preempt_check() )
+                break;
+        }
+        if ( !rc ) /* notify upper caller how many vcpus have been processed */
+            op->u.v.nr_vcpus = index;
+        break;
     }
 
     return rc;
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index c195129..cd25e08 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -1148,11 +1148,20 @@  long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
     if ( ret )
         return ret;
 
-    if ( (op->sched_id != DOM2OP(d)->sched_id) ||
-         ((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
-          (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
+    if ( op->sched_id != DOM2OP(d)->sched_id )
         return -EINVAL;
 
+    switch ( op->cmd )
+    {
+    case XEN_DOMCTL_SCHEDOP_putinfo:
+    case XEN_DOMCTL_SCHEDOP_getinfo:
+    case XEN_DOMCTL_SCHEDOP_putvcpuinfo:
+    case XEN_DOMCTL_SCHEDOP_getvcpuinfo:
+        break;
+    default:
+        return -EINVAL;
+    }
+
     /* NB: the pluggable scheduler code needs to take care
      * of locking by itself. */
     if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 7a56b3f..9297c01 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -338,24 +338,63 @@  DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
 #define XEN_SCHEDULER_ARINC653 7
 #define XEN_SCHEDULER_RTDS     8
 
-/* Set or get info? */
+typedef struct xen_domctl_sched_credit {
+    uint16_t weight;
+    uint16_t cap;
+} xen_domctl_sched_credit_t;
+
+typedef struct xen_domctl_sched_credit2 {
+    uint16_t weight;
+} xen_domctl_sched_credit2_t;
+
+typedef struct xen_domctl_sched_rtds {
+    uint32_t period;
+    uint32_t budget;
+} xen_domctl_sched_rtds_t;
+
+typedef struct xen_domctl_schedparam_vcpu {
+    union {
+        xen_domctl_sched_credit_t credit;
+        xen_domctl_sched_credit2_t credit2;
+        xen_domctl_sched_rtds_t rtds;
+    } u;
+    uint32_t vcpuid;
+    uint16_t padding[2];
+} xen_domctl_schedparam_vcpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_schedparam_vcpu_t);
+
+/*
+ * Set or get info?
+ * For schedulers supporting per-vcpu settings (e.g., RTDS):
+ *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
+ *  XEN_DOMCTL_SCHEDOP_getinfo gets default params;
+ *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo sets (gets) params of vcpus;
+ *
+ * For schedulers not supporting per-vcpu settings:
+ *  XEN_DOMCTL_SCHEDOP_putinfo sets params for all vcpus;
+ *  XEN_DOMCTL_SCHEDOP_getinfo gets domain-wise params;
+ *  XEN_DOMCTL_SCHEDOP_put(get)vcpuinfo returns error;
+ */
 #define XEN_DOMCTL_SCHEDOP_putinfo 0
 #define XEN_DOMCTL_SCHEDOP_getinfo 1
+#define XEN_DOMCTL_SCHEDOP_putvcpuinfo 2
+#define XEN_DOMCTL_SCHEDOP_getvcpuinfo 3
 struct xen_domctl_scheduler_op {
     uint32_t sched_id;  /* XEN_SCHEDULER_* */
     uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */
     union {
-        struct xen_domctl_sched_credit {
-            uint16_t weight;
-            uint16_t cap;
-        } credit;
-        struct xen_domctl_sched_credit2 {
-            uint16_t weight;
-        } credit2;
-        struct xen_domctl_sched_rtds {
-            uint32_t period;
-            uint32_t budget;
-        } rtds;
+        xen_domctl_sched_credit_t credit;
+        xen_domctl_sched_credit2_t credit2;
+        xen_domctl_sched_rtds_t rtds;
+        struct {
+            XEN_GUEST_HANDLE_64(xen_domctl_schedparam_vcpu_t) vcpus;
+            /*
+             * IN: Number of elements in vcpus array.
+             * OUT: Number of processed elements of vcpus array.
+             */
+            uint32_t nr_vcpus;
+            uint32_t padding;
+        } v;
     } u;
 };
 typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;