diff mbox series

[RFC,v1,4/6] xentop: collect IRQ and HYP time statistics.

Message ID 20200612002205.174295-5-volodymyr_babchuk@epam.com (mailing list archive)
State New, archived
Headers show
Series Fair scheduling | expand

Commit Message

Volodymyr Babchuk June 12, 2020, 12:22 a.m. UTC
As scheduler code now collects time spent in IRQ handlers and in
do_softirq(), we can present those values to userspace tools like
xentop, so system administrator can see how system behaves.

We are updating counters only in sched_get_time_correction() function
to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
is not enough to store time with nanosecond precision. So we need to
use 64 bit variables and protect them with spinlock.

Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
---
 xen/common/sched/core.c     | 17 +++++++++++++++++
 xen/common/sysctl.c         |  1 +
 xen/include/public/sysctl.h |  4 +++-
 xen/include/xen/sched.h     |  2 ++
 4 files changed, 23 insertions(+), 1 deletion(-)

Comments

Jürgen Groß June 12, 2020, 4:57 a.m. UTC | #1
On 12.06.20 02:22, Volodymyr Babchuk wrote:
> As scheduler code now collects time spent in IRQ handlers and in
> do_softirq(), we can present those values to userspace tools like
> xentop, so system administrator can see how system behaves.
> 
> We are updating counters only in sched_get_time_correction() function
> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
> is not enough to store time with nanosecond precision. So we need to
> use 64 bit variables and protect them with spinlock.
> 
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> ---
>   xen/common/sched/core.c     | 17 +++++++++++++++++
>   xen/common/sysctl.c         |  1 +
>   xen/include/public/sysctl.h |  4 +++-
>   xen/include/xen/sched.h     |  2 ++
>   4 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> index a7294ff5c3..ee6b1d9161 100644
> --- a/xen/common/sched/core.c
> +++ b/xen/common/sched/core.c
> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>   
>   static bool scheduler_active;
>   
> +static DEFINE_SPINLOCK(sched_stat_lock);
> +s_time_t sched_stat_irq_time;
> +s_time_t sched_stat_hyp_time;
> +
>   static void sched_set_affinity(
>       struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
>   
> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u)
>               break;
>       }
>   
> +    spin_lock_irqsave(&sched_stat_lock, flags);
> +    sched_stat_irq_time += irq;
> +    sched_stat_hyp_time += hyp;
> +    spin_unlock_irqrestore(&sched_stat_lock, flags);

Please don't use a lock. Just use add_sized() instead which will add
atomically.

> +
>       return irq + hyp;
>   }
>   
> +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time)
> +{
> +    unsigned long flags;
> +
> +    spin_lock_irqsave(&sched_stat_lock, flags);
> +    *irq_time = sched_stat_irq_time;
> +    *hyp_time = sched_stat_hyp_time;
> +    spin_unlock_irqrestore(&sched_stat_lock, flags);

read_atomic() will do the job without lock.

>   }
>   
>   /*
> diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
> index 1c6a817476..00683bc93f 100644
> --- a/xen/common/sysctl.c
> +++ b/xen/common/sysctl.c
> @@ -270,6 +270,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
>           pi->scrub_pages = 0;
>           pi->cpu_khz = cpu_khz;
>           pi->max_mfn = get_upper_mfn_bound();
> +        sched_get_time_stats(&pi->irq_time, &pi->hyp_time);
>           arch_do_physinfo(pi);
>           if ( iommu_enabled )
>           {
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 3a08c512e8..f320144d40 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -35,7 +35,7 @@
>   #include "domctl.h"
>   #include "physdev.h"
>   
> -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013
> +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
>   
>   /*
>    * Read console content from Xen buffer ring.
> @@ -118,6 +118,8 @@ struct xen_sysctl_physinfo {
>       uint64_aligned_t scrub_pages;
>       uint64_aligned_t outstanding_pages;
>       uint64_aligned_t max_mfn; /* Largest possible MFN on this host */
> +    uint64_aligned_t irq_time;
> +    uint64_aligned_t hyp_time;

Would hypfs work, too? This would avoid the need for extending another
hypercall.


Juergen
Volodymyr Babchuk June 12, 2020, 11:44 a.m. UTC | #2
On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
> On 12.06.20 02:22, Volodymyr Babchuk wrote:
> > As scheduler code now collects time spent in IRQ handlers and in
> > do_softirq(), we can present those values to userspace tools like
> > xentop, so system administrator can see how system behaves.
> > 
> > We are updating counters only in sched_get_time_correction() function
> > to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
> > is not enough to store time with nanosecond precision. So we need to
> > use 64 bit variables and protect them with spinlock.
> > 
> > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> > ---
> >   xen/common/sched/core.c     | 17 +++++++++++++++++
> >   xen/common/sysctl.c         |  1 +
> >   xen/include/public/sysctl.h |  4 +++-
> >   xen/include/xen/sched.h     |  2 ++
> >   4 files changed, 23 insertions(+), 1 deletion(-)
> > 
> > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> > index a7294ff5c3..ee6b1d9161 100644
> > --- a/xen/common/sched/core.c
> > +++ b/xen/common/sched/core.c
> > @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
> >   
> >   static bool scheduler_active;
> >   
> > +static DEFINE_SPINLOCK(sched_stat_lock);
> > +s_time_t sched_stat_irq_time;
> > +s_time_t sched_stat_hyp_time;
> > +
> >   static void sched_set_affinity(
> >       struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
> >   
> > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u)
> >               break;
> >       }
> >   
> > +    spin_lock_irqsave(&sched_stat_lock, flags);
> > +    sched_stat_irq_time += irq;
> > +    sched_stat_hyp_time += hyp;
> > +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> 
> Please don't use a lock. Just use add_sized() instead which will add
> atomically.

Looks like arm does not support 64 bit variables.

Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
bit atomics?

> > +
> >       return irq + hyp;
> >   }
> >   
> > +void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time)
> > +{
> > +    unsigned long flags;
> > +
> > +    spin_lock_irqsave(&sched_stat_lock, flags);
> > +    *irq_time = sched_stat_irq_time;
> > +    *hyp_time = sched_stat_hyp_time;
> > +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> 
> read_atomic() will do the job without lock.

Yes, I really want to use atomics there. Just need to clarify 64 bit
support on ARM.

> >   }
> >   
> >   /*
> > diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
> > index 1c6a817476..00683bc93f 100644
> > --- a/xen/common/sysctl.c
> > +++ b/xen/common/sysctl.c
> > @@ -270,6 +270,7 @@ long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
> >           pi->scrub_pages = 0;
> >           pi->cpu_khz = cpu_khz;
> >           pi->max_mfn = get_upper_mfn_bound();
> > +        sched_get_time_stats(&pi->irq_time, &pi->hyp_time);
> >           arch_do_physinfo(pi);
> >           if ( iommu_enabled )
> >           {
> > diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> > index 3a08c512e8..f320144d40 100644
> > --- a/xen/include/public/sysctl.h
> > +++ b/xen/include/public/sysctl.h
> > @@ -35,7 +35,7 @@
> >   #include "domctl.h"
> >   #include "physdev.h"
> >   
> > -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013
> > +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
> >   
> >   /*
> >    * Read console content from Xen buffer ring.
> > @@ -118,6 +118,8 @@ struct xen_sysctl_physinfo {
> >       uint64_aligned_t scrub_pages;
> >       uint64_aligned_t outstanding_pages;
> >       uint64_aligned_t max_mfn; /* Largest possible MFN on this host */
> > +    uint64_aligned_t irq_time;
> > +    uint64_aligned_t hyp_time;
> 
> Would hypfs work, too? This would avoid the need for extending another
> hypercall.

Good point. I'll take a look at this from toolstack side. I didn't see
any hypfs calls in the xentop. But this is a good time to begin using
it.
Julien Grall June 12, 2020, 12:29 p.m. UTC | #3
Hi Juergen,

On 12/06/2020 05:57, Jürgen Groß wrote:
> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>> As scheduler code now collects time spent in IRQ handlers and in
>> do_softirq(), we can present those values to userspace tools like
>> xentop, so system administrator can see how system behaves.
>>
>> We are updating counters only in sched_get_time_correction() function
>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
>> is not enough to store time with nanosecond precision. So we need to
>> use 64 bit variables and protect them with spinlock.
>>
>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
>> ---
>>   xen/common/sched/core.c     | 17 +++++++++++++++++
>>   xen/common/sysctl.c         |  1 +
>>   xen/include/public/sysctl.h |  4 +++-
>>   xen/include/xen/sched.h     |  2 ++
>>   4 files changed, 23 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
>> index a7294ff5c3..ee6b1d9161 100644
>> --- a/xen/common/sched/core.c
>> +++ b/xen/common/sched/core.c
>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>>   static bool scheduler_active;
>> +static DEFINE_SPINLOCK(sched_stat_lock);
>> +s_time_t sched_stat_irq_time;
>> +s_time_t sched_stat_hyp_time;
>> +
>>   static void sched_set_affinity(
>>       struct sched_unit *unit, const cpumask_t *hard, const cpumask_t 
>> *soft);
>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct 
>> sched_unit *u)
>>               break;
>>       }
>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>> +    sched_stat_irq_time += irq;
>> +    sched_stat_hyp_time += hyp;
>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> 
> Please don't use a lock. Just use add_sized() instead which will add
> atomically.

add_sized() is definitely not atomic. It will only prevent the compiler 
to read/write multiple time the variable.

If we expect sched_get_time_correction to be called concurrently then we 
would need to introduce atomic64_t or a spin lock.

Cheers,
Jürgen Groß June 12, 2020, 12:41 p.m. UTC | #4
On 12.06.20 14:29, Julien Grall wrote:
> Hi Juergen,
> 
> On 12/06/2020 05:57, Jürgen Groß wrote:
>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>> As scheduler code now collects time spent in IRQ handlers and in
>>> do_softirq(), we can present those values to userspace tools like
>>> xentop, so system administrator can see how system behaves.
>>>
>>> We are updating counters only in sched_get_time_correction() function
>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
>>> is not enough to store time with nanosecond precision. So we need to
>>> use 64 bit variables and protect them with spinlock.
>>>
>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
>>> ---
>>>   xen/common/sched/core.c     | 17 +++++++++++++++++
>>>   xen/common/sysctl.c         |  1 +
>>>   xen/include/public/sysctl.h |  4 +++-
>>>   xen/include/xen/sched.h     |  2 ++
>>>   4 files changed, 23 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
>>> index a7294ff5c3..ee6b1d9161 100644
>>> --- a/xen/common/sched/core.c
>>> +++ b/xen/common/sched/core.c
>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>>>   static bool scheduler_active;
>>> +static DEFINE_SPINLOCK(sched_stat_lock);
>>> +s_time_t sched_stat_irq_time;
>>> +s_time_t sched_stat_hyp_time;
>>> +
>>>   static void sched_set_affinity(
>>>       struct sched_unit *unit, const cpumask_t *hard, const cpumask_t 
>>> *soft);
>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct 
>>> sched_unit *u)
>>>               break;
>>>       }
>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>> +    sched_stat_irq_time += irq;
>>> +    sched_stat_hyp_time += hyp;
>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>
>> Please don't use a lock. Just use add_sized() instead which will add
>> atomically.
> 
> add_sized() is definitely not atomic. It will only prevent the compiler 
> to read/write multiple time the variable.

Oh, my bad, I let myself fool by it being defined in atomic.h.

> 
> If we expect sched_get_time_correction to be called concurrently then we 
> would need to introduce atomic64_t or a spin lock.

Or we could use percpu variables and add the cpu values up when
fetching the values.


Juergen
Julien Grall June 12, 2020, 12:45 p.m. UTC | #5
Hi Volodymyr,

On 12/06/2020 12:44, Volodymyr Babchuk wrote:
> 
> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>> As scheduler code now collects time spent in IRQ handlers and in
>>> do_softirq(), we can present those values to userspace tools like
>>> xentop, so system administrator can see how system behaves.
>>>
>>> We are updating counters only in sched_get_time_correction() function
>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
>>> is not enough to store time with nanosecond precision. So we need to
>>> use 64 bit variables and protect them with spinlock.
>>>
>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
>>> ---
>>>    xen/common/sched/core.c     | 17 +++++++++++++++++
>>>    xen/common/sysctl.c         |  1 +
>>>    xen/include/public/sysctl.h |  4 +++-
>>>    xen/include/xen/sched.h     |  2 ++
>>>    4 files changed, 23 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
>>> index a7294ff5c3..ee6b1d9161 100644
>>> --- a/xen/common/sched/core.c
>>> +++ b/xen/common/sched/core.c
>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>>>    
>>>    static bool scheduler_active;
>>>    
>>> +static DEFINE_SPINLOCK(sched_stat_lock);
>>> +s_time_t sched_stat_irq_time;
>>> +s_time_t sched_stat_hyp_time;
>>> +
>>>    static void sched_set_affinity(
>>>        struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
>>>    
>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u)
>>>                break;
>>>        }
>>>    
>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>> +    sched_stat_irq_time += irq;
>>> +    sched_stat_hyp_time += hyp;
>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>
>> Please don't use a lock. Just use add_sized() instead which will add
>> atomically.
> 
> Looks like arm does not support 64 bit variables. >
> Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
> bit atomics?

64-bit atomics can work on both Armv7 and Armv8 :). It just haven't been 
plumbed yet.

I am happy to write a patch if you need atomic64_t or even a 64-bit 
add_sized().

Cheers,
Dario Faggioli June 12, 2020, 3:29 p.m. UTC | #6
On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
> On 12.06.20 14:29, Julien Grall wrote:
> > On 12/06/2020 05:57, Jürgen Groß wrote:
> > > On 12.06.20 02:22, Volodymyr Babchuk wrote:
> > > > 
> > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct 
> > > > sched_unit *u)
> > > >               break;
> > > >       }
> > > > +    spin_lock_irqsave(&sched_stat_lock, flags);
> > > > +    sched_stat_irq_time += irq;
> > > > +    sched_stat_hyp_time += hyp;
> > > > +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> > > 
> > > Please don't use a lock. Just use add_sized() instead which will
> > > add
> > > atomically.
> > 
> > If we expect sched_get_time_correction to be called concurrently
> > then we 
> > would need to introduce atomic64_t or a spin lock.
> 
> Or we could use percpu variables and add the cpu values up when
> fetching the values.
> 
Yes, either percpu or atomic looks much better than locking, to me, for
this.

Regards
Volodymyr Babchuk June 12, 2020, 10:16 p.m. UTC | #7
Hi Julien,

On Fri, 2020-06-12 at 13:45 +0100, Julien Grall wrote:
> Hi Volodymyr,
> 
> On 12/06/2020 12:44, Volodymyr Babchuk wrote:
> > On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
> > > On 12.06.20 02:22, Volodymyr Babchuk wrote:
> > > > As scheduler code now collects time spent in IRQ handlers and in
> > > > do_softirq(), we can present those values to userspace tools like
> > > > xentop, so system administrator can see how system behaves.
> > > > 
> > > > We are updating counters only in sched_get_time_correction() function
> > > > to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
> > > > is not enough to store time with nanosecond precision. So we need to
> > > > use 64 bit variables and protect them with spinlock.
> > > > 
> > > > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> > > > ---
> > > >    xen/common/sched/core.c     | 17 +++++++++++++++++
> > > >    xen/common/sysctl.c         |  1 +
> > > >    xen/include/public/sysctl.h |  4 +++-
> > > >    xen/include/xen/sched.h     |  2 ++
> > > >    4 files changed, 23 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> > > > index a7294ff5c3..ee6b1d9161 100644
> > > > --- a/xen/common/sched/core.c
> > > > +++ b/xen/common/sched/core.c
> > > > @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
> > > >    
> > > >    static bool scheduler_active;
> > > >    
> > > > +static DEFINE_SPINLOCK(sched_stat_lock);
> > > > +s_time_t sched_stat_irq_time;
> > > > +s_time_t sched_stat_hyp_time;
> > > > +
> > > >    static void sched_set_affinity(
> > > >        struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
> > > >    
> > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct sched_unit *u)
> > > >                break;
> > > >        }
> > > >    
> > > > +    spin_lock_irqsave(&sched_stat_lock, flags);
> > > > +    sched_stat_irq_time += irq;
> > > > +    sched_stat_hyp_time += hyp;
> > > > +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> > > 
> > > Please don't use a lock. Just use add_sized() instead which will add
> > > atomically.
> > 
> > Looks like arm does not support 64 bit variables. >
> > Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
> > bit atomics?
> 
> 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't been 
> plumbed yet.

Wow, didn't know that armv7 is capable of that.

> I am happy to write a patch if you need atomic64_t or even a 64-bit 
> add_sized().

That would be cool. Certainly. But looks like x86 code does not have
implementation for atomic64_t as well. So, there would be lots of
changes just for one use case. I don't know if it is worth it.

Let's finish discussion of other parts of the series. If it will appear
that atomic64_t is absolutely necessay, I'll return back to you.
Thanks for offer anyways.
Volodymyr Babchuk June 12, 2020, 10:27 p.m. UTC | #8
On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote:
> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
> > On 12.06.20 14:29, Julien Grall wrote:
> > > On 12/06/2020 05:57, Jürgen Groß wrote:
> > > > On 12.06.20 02:22, Volodymyr Babchuk wrote:
> > > > > @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct 
> > > > > sched_unit *u)
> > > > >               break;
> > > > >       }
> > > > > +    spin_lock_irqsave(&sched_stat_lock, flags);
> > > > > +    sched_stat_irq_time += irq;
> > > > > +    sched_stat_hyp_time += hyp;
> > > > > +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> > > > 
> > > > Please don't use a lock. Just use add_sized() instead which will
> > > > add
> > > > atomically.
> > > 
> > > If we expect sched_get_time_correction to be called concurrently
> > > then we 
> > > would need to introduce atomic64_t or a spin lock.
> > 
> > Or we could use percpu variables and add the cpu values up when
> > fetching the values.
> > 
> Yes, either percpu or atomic looks much better than locking, to me, for
> this.

Looks like we going to have atomic64_t after all. So, I'll prefer to to
use atomics there.
Jürgen Groß June 13, 2020, 6:22 a.m. UTC | #9
On 13.06.20 00:27, Volodymyr Babchuk wrote:
> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote:
>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
>>> On 12.06.20 14:29, Julien Grall wrote:
>>>> On 12/06/2020 05:57, Jürgen Groß wrote:
>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>>>>>> sched_unit *u)
>>>>>>                break;
>>>>>>        }
>>>>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>>>>> +    sched_stat_irq_time += irq;
>>>>>> +    sched_stat_hyp_time += hyp;
>>>>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>>>>
>>>>> Please don't use a lock. Just use add_sized() instead which will
>>>>> add
>>>>> atomically.
>>>>
>>>> If we expect sched_get_time_correction to be called concurrently
>>>> then we
>>>> would need to introduce atomic64_t or a spin lock.
>>>
>>> Or we could use percpu variables and add the cpu values up when
>>> fetching the values.
>>>
>> Yes, either percpu or atomic looks much better than locking, to me, for
>> this.
> 
> Looks like we going to have atomic64_t after all. So, I'll prefer to to
> use atomics there.

Performance would be better using percpu variables, as those would avoid
the cacheline moved between cpus a lot.


Juergen
Volodymyr Babchuk June 18, 2020, 2:58 a.m. UTC | #10
Hi Jürgen,

Jürgen Groß writes:

> On 13.06.20 00:27, Volodymyr Babchuk wrote:
>> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote:
>>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
>>>> On 12.06.20 14:29, Julien Grall wrote:
>>>>> On 12/06/2020 05:57, Jürgen Groß wrote:
>>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>>>>>>> sched_unit *u)
>>>>>>>                break;
>>>>>>>        }
>>>>>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>>>>>> +    sched_stat_irq_time += irq;
>>>>>>> +    sched_stat_hyp_time += hyp;
>>>>>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>>>>>
>>>>>> Please don't use a lock. Just use add_sized() instead which will
>>>>>> add
>>>>>> atomically.
>>>>>
>>>>> If we expect sched_get_time_correction to be called concurrently
>>>>> then we
>>>>> would need to introduce atomic64_t or a spin lock.
>>>>
>>>> Or we could use percpu variables and add the cpu values up when
>>>> fetching the values.
>>>>
>>> Yes, either percpu or atomic looks much better than locking, to me, for
>>> this.
>>
>> Looks like we going to have atomic64_t after all. So, I'll prefer to to
>> use atomics there.
>
> Performance would be better using percpu variables, as those would avoid
> the cacheline moved between cpus a lot.

I see. But don't we need locking in this case? I can see scenario, when
one pCPU updates own counters while another pCPU is reading them.

IIRC, ARMv8 guarantees that 64 bit read of aligned data would be
consistent. "Consistent" in the sense that, for example, we would not
see lower 32 bits of the new value and upper 32 bits of the old value.

I can't say for sure about ARMv7 and about x86.
Julien Grall June 18, 2020, 3:17 p.m. UTC | #11
On 18/06/2020 03:58, Volodymyr Babchuk wrote:
> 
> Hi Jürgen,
> 
> Jürgen Groß writes:
> 
>> On 13.06.20 00:27, Volodymyr Babchuk wrote:
>>> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote:
>>>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
>>>>> On 12.06.20 14:29, Julien Grall wrote:
>>>>>> On 12/06/2020 05:57, Jürgen Groß wrote:
>>>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>>>>>>>> sched_unit *u)
>>>>>>>>                 break;
>>>>>>>>         }
>>>>>>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>>>>>>> +    sched_stat_irq_time += irq;
>>>>>>>> +    sched_stat_hyp_time += hyp;
>>>>>>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>>>>>>
>>>>>>> Please don't use a lock. Just use add_sized() instead which will
>>>>>>> add
>>>>>>> atomically.
>>>>>>
>>>>>> If we expect sched_get_time_correction to be called concurrently
>>>>>> then we
>>>>>> would need to introduce atomic64_t or a spin lock.
>>>>>
>>>>> Or we could use percpu variables and add the cpu values up when
>>>>> fetching the values.
>>>>>
>>>> Yes, either percpu or atomic looks much better than locking, to me, for
>>>> this.
>>>
>>> Looks like we going to have atomic64_t after all. So, I'll prefer to to
>>> use atomics there.
>>
>> Performance would be better using percpu variables, as those would avoid
>> the cacheline moved between cpus a lot.
> 
> I see. But don't we need locking in this case? I can see scenario, when
> one pCPU updates own counters while another pCPU is reading them.
> 
> IIRC, ARMv8 guarantees that 64 bit read of aligned data would be
> consistent. "Consistent" in the sense that, for example, we would not
> see lower 32 bits of the new value and upper 32 bits of the old value.

That's right. Although this would be valid so long you use {read, 
write}_atomic().

> 
> I can't say for sure about ARMv7 and about x86.
ARMv7 with LPAE support will guarantee 64-bit atomicity when using 
strd/ldrd as long as the alignment is correct. LPAE is mandatory when 
supporting HYP mode, so you can safely assume this will work.

64-bit on x86 is also guaranteed to be atomic when using write_atomic().
Jan Beulich June 18, 2020, 3:23 p.m. UTC | #12
On 18.06.2020 17:17, Julien Grall wrote:
> 
> 
> On 18/06/2020 03:58, Volodymyr Babchuk wrote:
>>
>> Hi Jürgen,
>>
>> Jürgen Groß writes:
>>
>>> On 13.06.20 00:27, Volodymyr Babchuk wrote:
>>>> On Fri, 2020-06-12 at 17:29 +0200, Dario Faggioli wrote:
>>>>> On Fri, 2020-06-12 at 14:41 +0200, Jürgen Groß wrote:
>>>>>> On 12.06.20 14:29, Julien Grall wrote:
>>>>>>> On 12/06/2020 05:57, Jürgen Groß wrote:
>>>>>>>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>>>>>>>> @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>>>>>>>>> sched_unit *u)
>>>>>>>>>                 break;
>>>>>>>>>         }
>>>>>>>>> +    spin_lock_irqsave(&sched_stat_lock, flags);
>>>>>>>>> +    sched_stat_irq_time += irq;
>>>>>>>>> +    sched_stat_hyp_time += hyp;
>>>>>>>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>>>>>>>
>>>>>>>> Please don't use a lock. Just use add_sized() instead which will
>>>>>>>> add
>>>>>>>> atomically.
>>>>>>>
>>>>>>> If we expect sched_get_time_correction to be called concurrently
>>>>>>> then we
>>>>>>> would need to introduce atomic64_t or a spin lock.
>>>>>>
>>>>>> Or we could use percpu variables and add the cpu values up when
>>>>>> fetching the values.
>>>>>>
>>>>> Yes, either percpu or atomic looks much better than locking, to me, for
>>>>> this.
>>>>
>>>> Looks like we going to have atomic64_t after all. So, I'll prefer to to
>>>> use atomics there.
>>>
>>> Performance would be better using percpu variables, as those would avoid
>>> the cacheline moved between cpus a lot.
>>
>> I see. But don't we need locking in this case? I can see scenario, when
>> one pCPU updates own counters while another pCPU is reading them.
>>
>> IIRC, ARMv8 guarantees that 64 bit read of aligned data would be
>> consistent. "Consistent" in the sense that, for example, we would not
>> see lower 32 bits of the new value and upper 32 bits of the old value.
> 
> That's right. Although this would be valid so long you use {read, 
> write}_atomic().
> 
>>
>> I can't say for sure about ARMv7 and about x86.
> ARMv7 with LPAE support will guarantee 64-bit atomicity when using 
> strd/ldrd as long as the alignment is correct. LPAE is mandatory when 
> supporting HYP mode, so you can safely assume this will work.
> 
> 64-bit on x86 is also guaranteed to be atomic when using write_atomic().

... and when again the data is suitably aligned, or at the very least
(for WB RAM) doesn't cross certain boundaries.

Jan
Volodymyr Babchuk June 18, 2020, 8:24 p.m. UTC | #13
Hi Julien,

Julien Grall writes:

> Hi Volodymyr,
>
> On 12/06/2020 12:44, Volodymyr Babchuk wrote:
>>
>> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
>>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>>>> As scheduler code now collects time spent in IRQ handlers and in
>>>> do_softirq(), we can present those values to userspace tools like
>>>> xentop, so system administrator can see how system behaves.
>>>>
>>>> We are updating counters only in sched_get_time_correction() function
>>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
>>>> is not enough to store time with nanosecond precision. So we need to
>>>> use 64 bit variables and protect them with spinlock.
>>>>
>>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
>>>> ---
>>>>    xen/common/sched/core.c     | 17 +++++++++++++++++
>>>>    xen/common/sysctl.c         |  1 +
>>>>    xen/include/public/sysctl.h |  4 +++-
>>>>    xen/include/xen/sched.h     |  2 ++
>>>>    4 files changed, 23 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
>>>> index a7294ff5c3..ee6b1d9161 100644
>>>> --- a/xen/common/sched/core.c
>>>> +++ b/xen/common/sched/core.c
>>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>>>>       static bool scheduler_active;
>>>>    +static DEFINE_SPINLOCK(sched_stat_lock);
>>>> +s_time_t sched_stat_irq_time;
>>>> +s_time_t sched_stat_hyp_time;
>>>> +
>>>>    static void sched_set_affinity(
>>>>        struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
>>>>    @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>>>> sched_unit *u)
>>>>                break;
>>>>        }
>>>>    +    spin_lock_irqsave(&sched_stat_lock, flags);
>>>> +    sched_stat_irq_time += irq;
>>>> +    sched_stat_hyp_time += hyp;
>>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>>>
>>> Please don't use a lock. Just use add_sized() instead which will add
>>> atomically.
>>
>> Looks like arm does not support 64 bit variables. >
>> Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
>> bit atomics?
>
> 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't
> been plumbed yet.
>
> I am happy to write a patch if you need atomic64_t or even a 64-bit
> add_sized().

Looks like I'll need this patch. So, if you still have time, it will be
great, if you'll write it.
Julien Grall June 18, 2020, 8:34 p.m. UTC | #14
On Thu, 18 Jun 2020 at 21:24, Volodymyr Babchuk
<Volodymyr_Babchuk@epam.com> wrote:
>
>
> Hi Julien,
>
> Julien Grall writes:
>
> > Hi Volodymyr,
> >
> > On 12/06/2020 12:44, Volodymyr Babchuk wrote:
> >>
> >> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
> >>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
> >>>> As scheduler code now collects time spent in IRQ handlers and in
> >>>> do_softirq(), we can present those values to userspace tools like
> >>>> xentop, so system administrator can see how system behaves.
> >>>>
> >>>> We are updating counters only in sched_get_time_correction() function
> >>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
> >>>> is not enough to store time with nanosecond precision. So we need to
> >>>> use 64 bit variables and protect them with spinlock.
> >>>>
> >>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> >>>> ---
> >>>>    xen/common/sched/core.c     | 17 +++++++++++++++++
> >>>>    xen/common/sysctl.c         |  1 +
> >>>>    xen/include/public/sysctl.h |  4 +++-
> >>>>    xen/include/xen/sched.h     |  2 ++
> >>>>    4 files changed, 23 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
> >>>> index a7294ff5c3..ee6b1d9161 100644
> >>>> --- a/xen/common/sched/core.c
> >>>> +++ b/xen/common/sched/core.c
> >>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
> >>>>       static bool scheduler_active;
> >>>>    +static DEFINE_SPINLOCK(sched_stat_lock);
> >>>> +s_time_t sched_stat_irq_time;
> >>>> +s_time_t sched_stat_hyp_time;
> >>>> +
> >>>>    static void sched_set_affinity(
> >>>>        struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
> >>>>    @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
> >>>> sched_unit *u)
> >>>>                break;
> >>>>        }
> >>>>    +    spin_lock_irqsave(&sched_stat_lock, flags);
> >>>> +    sched_stat_irq_time += irq;
> >>>> +    sched_stat_hyp_time += hyp;
> >>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
> >>>
> >>> Please don't use a lock. Just use add_sized() instead which will add
> >>> atomically.
> >>
> >> Looks like arm does not support 64 bit variables. >
> >> Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
> >> bit atomics?
> >
> > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't
> > been plumbed yet.
> >
> > I am happy to write a patch if you need atomic64_t or even a 64-bit
> > add_sized().
>
> Looks like I'll need this patch. So, if you still have time, it will be
> great, if you'll write it.

I offered help for either the atomic64_t or the add_sized(). Can you
confirm which one you need?

Cheers,
Volodymyr Babchuk June 18, 2020, 11:35 p.m. UTC | #15
Hi Julien,

Julien Grall writes:

> On Thu, 18 Jun 2020 at 21:24, Volodymyr Babchuk
> <Volodymyr_Babchuk@epam.com> wrote:
>>
>>
>> Hi Julien,
>>
>> Julien Grall writes:
>>
>> > Hi Volodymyr,
>> >
>> > On 12/06/2020 12:44, Volodymyr Babchuk wrote:
>> >>
>> >> On Fri, 2020-06-12 at 06:57 +0200, Jürgen Groß wrote:
>> >>> On 12.06.20 02:22, Volodymyr Babchuk wrote:
>> >>>> As scheduler code now collects time spent in IRQ handlers and in
>> >>>> do_softirq(), we can present those values to userspace tools like
>> >>>> xentop, so system administrator can see how system behaves.
>> >>>>
>> >>>> We are updating counters only in sched_get_time_correction() function
>> >>>> to minimize number of taken spinlocks. As atomic_t is 32 bit wide, it
>> >>>> is not enough to store time with nanosecond precision. So we need to
>> >>>> use 64 bit variables and protect them with spinlock.
>> >>>>
>> >>>> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
>> >>>> ---
>> >>>>    xen/common/sched/core.c     | 17 +++++++++++++++++
>> >>>>    xen/common/sysctl.c         |  1 +
>> >>>>    xen/include/public/sysctl.h |  4 +++-
>> >>>>    xen/include/xen/sched.h     |  2 ++
>> >>>>    4 files changed, 23 insertions(+), 1 deletion(-)
>> >>>>
>> >>>> diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
>> >>>> index a7294ff5c3..ee6b1d9161 100644
>> >>>> --- a/xen/common/sched/core.c
>> >>>> +++ b/xen/common/sched/core.c
>> >>>> @@ -95,6 +95,10 @@ static struct scheduler __read_mostly ops;
>> >>>>       static bool scheduler_active;
>> >>>>    +static DEFINE_SPINLOCK(sched_stat_lock);
>> >>>> +s_time_t sched_stat_irq_time;
>> >>>> +s_time_t sched_stat_hyp_time;
>> >>>> +
>> >>>>    static void sched_set_affinity(
>> >>>>        struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
>> >>>>    @@ -994,9 +998,22 @@ s_time_t sched_get_time_correction(struct
>> >>>> sched_unit *u)
>> >>>>                break;
>> >>>>        }
>> >>>>    +    spin_lock_irqsave(&sched_stat_lock, flags);
>> >>>> +    sched_stat_irq_time += irq;
>> >>>> +    sched_stat_hyp_time += hyp;
>> >>>> +    spin_unlock_irqrestore(&sched_stat_lock, flags);
>> >>>
>> >>> Please don't use a lock. Just use add_sized() instead which will add
>> >>> atomically.
>> >>
>> >> Looks like arm does not support 64 bit variables. >
>> >> Julien, I believe, this is armv7 limitation? Should armv8 work with 64-
>> >> bit atomics?
>> >
>> > 64-bit atomics can work on both Armv7 and Armv8 :). It just haven't
>> > been plumbed yet.
>> >
>> > I am happy to write a patch if you need atomic64_t or even a 64-bit
>> > add_sized().
>>
>> Looks like I'll need this patch. So, if you still have time, it will be
>> great, if you'll write it.
>
> I offered help for either the atomic64_t or the add_sized(). Can you
> confirm which one you need?

Yes, sorry. I had atomic64_t in mind.
diff mbox series

Patch

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index a7294ff5c3..ee6b1d9161 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -95,6 +95,10 @@  static struct scheduler __read_mostly ops;
 
 static bool scheduler_active;
 
+static DEFINE_SPINLOCK(sched_stat_lock);
+s_time_t sched_stat_irq_time;
+s_time_t sched_stat_hyp_time;
+
 static void sched_set_affinity(
     struct sched_unit *unit, const cpumask_t *hard, const cpumask_t *soft);
 
@@ -994,9 +998,22 @@  s_time_t sched_get_time_correction(struct sched_unit *u)
             break;
     }
 
+    spin_lock_irqsave(&sched_stat_lock, flags);
+    sched_stat_irq_time += irq;
+    sched_stat_hyp_time += hyp;
+    spin_unlock_irqrestore(&sched_stat_lock, flags);
+
     return irq + hyp;
 }
 
+void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&sched_stat_lock, flags);
+    *irq_time = sched_stat_irq_time;
+    *hyp_time = sched_stat_hyp_time;
+    spin_unlock_irqrestore(&sched_stat_lock, flags);
 }
 
 /*
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index 1c6a817476..00683bc93f 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -270,6 +270,7 @@  long do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
         pi->scrub_pages = 0;
         pi->cpu_khz = cpu_khz;
         pi->max_mfn = get_upper_mfn_bound();
+        sched_get_time_stats(&pi->irq_time, &pi->hyp_time);
         arch_do_physinfo(pi);
         if ( iommu_enabled )
         {
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 3a08c512e8..f320144d40 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -35,7 +35,7 @@ 
 #include "domctl.h"
 #include "physdev.h"
 
-#define XEN_SYSCTL_INTERFACE_VERSION 0x00000013
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000014
 
 /*
  * Read console content from Xen buffer ring.
@@ -118,6 +118,8 @@  struct xen_sysctl_physinfo {
     uint64_aligned_t scrub_pages;
     uint64_aligned_t outstanding_pages;
     uint64_aligned_t max_mfn; /* Largest possible MFN on this host */
+    uint64_aligned_t irq_time;
+    uint64_aligned_t hyp_time;
     uint32_t hw_cap[8];
 };
 
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 51dc7c4551..869d4efbd6 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -717,6 +717,8 @@  void vcpu_end_irq_handler(void);
 void vcpu_begin_hyp_task(struct vcpu *v);
 void vcpu_end_hyp_task(struct vcpu *v);
 
+void sched_get_time_stats(uint64_t *irq_time, uint64_t *hyp_time);
+
 /*
  * Force synchronisation of given VCPU's state. If it is currently descheduled,
  * this call will ensure that all its state is committed to memory and that