diff mbox

[v3] xen: add steal_clock support on x86

Message ID 1463664367-7675-1-git-send-email-jgross@suse.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jürgen Groß May 19, 2016, 1:26 p.m. UTC
The pv_time_ops structure contains a function pointer for the
"steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
uses its own mechanism to account for the "stolen" time a thread wasn't
able to run due to hypervisor scheduling.

Add support in Xen arch independent time handling for this feature by
moving it out of the arm arch into drivers/xen and remove the x86 Xen
hack.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V3: add #include <asm/paravirt.h> to avoid build error on arm
V2: remove the x86 do_stolen_accounting() hack
---
 arch/arm/xen/enlighten.c    | 17 ++---------------
 arch/x86/xen/time.c         | 44 ++------------------------------------------
 drivers/xen/time.c          | 20 ++++++++++++++++++++
 include/linux/kernel_stat.h |  1 -
 include/xen/xen-ops.h       |  1 +
 kernel/sched/cputime.c      | 10 ----------
 6 files changed, 25 insertions(+), 68 deletions(-)

Comments

Boris Ostrovsky May 19, 2016, 1:43 p.m. UTC | #1
On 05/19/2016 09:26 AM, Juergen Gross wrote:
> The pv_time_ops structure contains a function pointer for the
> "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
> uses its own mechanism to account for the "stolen" time a thread wasn't
> able to run due to hypervisor scheduling.
>
> Add support in Xen arch independent time handling for this feature by
> moving it out of the arm arch into drivers/xen and remove the x86 Xen
> hack.
>
> Signed-off-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

I think this also needs to be acked by (or at least copied to) generic
Linux maintainers.

> ---
>  arch/arm/xen/enlighten.c    | 17 ++---------------
>  arch/x86/xen/time.c         | 44 ++------------------------------------------
>  drivers/xen/time.c          | 20 ++++++++++++++++++++
>  include/linux/kernel_stat.h |  1 -
>  include/xen/xen-ops.h       |  1 +
>  kernel/sched/cputime.c      | 10 ----------
>  6 files changed, 25 insertions(+), 68 deletions(-)

...

>  }
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index 7107842..2257b66 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -6,6 +6,7 @@
>  #include <linux/math64.h>
>  #include <linux/gfp.h>
>  
> +#include <asm/paravirt.h>
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  
> @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
>  	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
>  }

(Unrelated to this patch.) Should this include RUNSTATE_offline as well?

-boris
Jürgen Groß May 19, 2016, 1:55 p.m. UTC | #2
On 19/05/16 15:26, Juergen Gross wrote:
> The pv_time_ops structure contains a function pointer for the
> "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
> uses its own mechanism to account for the "stolen" time a thread wasn't
> able to run due to hypervisor scheduling.
> 
> Add support in Xen arch independent time handling for this feature by
> moving it out of the arm arch into drivers/xen and remove the x86 Xen
> hack.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>

Sorry, forgot scheduling maintainer. Added.


Juergen

> ---
> V3: add #include <asm/paravirt.h> to avoid build error on arm
> V2: remove the x86 do_stolen_accounting() hack
> ---
>  arch/arm/xen/enlighten.c    | 17 ++---------------
>  arch/x86/xen/time.c         | 44 ++------------------------------------------
>  drivers/xen/time.c          | 20 ++++++++++++++++++++
>  include/linux/kernel_stat.h |  1 -
>  include/xen/xen-ops.h       |  1 +
>  kernel/sched/cputime.c      | 10 ----------
>  6 files changed, 25 insertions(+), 68 deletions(-)
> 
> diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
> index 75cd734..9163b94 100644
> --- a/arch/arm/xen/enlighten.c
> +++ b/arch/arm/xen/enlighten.c
> @@ -84,19 +84,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
>  }
>  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
>  
> -static unsigned long long xen_stolen_accounting(int cpu)
> -{
> -	struct vcpu_runstate_info state;
> -
> -	BUG_ON(cpu != smp_processor_id());
> -
> -	xen_get_runstate_snapshot(&state);
> -
> -	WARN_ON(state.state != RUNSTATE_running);
> -
> -	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> -}
> -
>  static void xen_read_wallclock(struct timespec64 *ts)
>  {
>  	u32 version;
> @@ -355,8 +342,8 @@ static int __init xen_guest_init(void)
>  
>  	register_cpu_notifier(&xen_cpu_notifier);
>  
> -	pv_time_ops.steal_clock = xen_stolen_accounting;
> -	static_key_slow_inc(&paravirt_steal_enabled);
> +	xen_time_setup_guest();
> +
>  	if (xen_initial_domain())
>  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
>  
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index a0a4e55..6be31df 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -11,8 +11,6 @@
>  #include <linux/interrupt.h>
>  #include <linux/clocksource.h>
>  #include <linux/clockchips.h>
> -#include <linux/kernel_stat.h>
> -#include <linux/math64.h>
>  #include <linux/gfp.h>
>  #include <linux/slab.h>
>  #include <linux/pvclock_gtod.h>
> @@ -31,44 +29,6 @@
>  
>  /* Xen may fire a timer up to this many ns early */
>  #define TIMER_SLOP	100000
> -#define NS_PER_TICK	(1000000000LL / HZ)
> -
> -/* snapshots of runstate info */
> -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> -
> -/* unused ns of stolen time */
> -static DEFINE_PER_CPU(u64, xen_residual_stolen);
> -
> -static void do_stolen_accounting(void)
> -{
> -	struct vcpu_runstate_info state;
> -	struct vcpu_runstate_info *snap;
> -	s64 runnable, offline, stolen;
> -	cputime_t ticks;
> -
> -	xen_get_runstate_snapshot(&state);
> -
> -	WARN_ON(state.state != RUNSTATE_running);
> -
> -	snap = this_cpu_ptr(&xen_runstate_snapshot);
> -
> -	/* work out how much time the VCPU has not been runn*ing*  */
> -	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
> -	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
> -
> -	*snap = state;
> -
> -	/* Add the appropriate number of ticks of stolen time,
> -	   including any left-overs from last time. */
> -	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
> -
> -	if (stolen < 0)
> -		stolen = 0;
> -
> -	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
> -	__this_cpu_write(xen_residual_stolen, stolen);
> -	account_steal_ticks(ticks);
> -}
>  
>  /* Get the TSC speed from Xen */
>  static unsigned long xen_tsc_khz(void)
> @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
>  		ret = IRQ_HANDLED;
>  	}
>  
> -	do_stolen_accounting();
> -
>  	return ret;
>  }
>  
> @@ -431,6 +389,8 @@ static void __init xen_time_init(void)
>  	xen_setup_timer(cpu);
>  	xen_setup_cpu_clockevents();
>  
> +	xen_time_setup_guest();
> +
>  	if (xen_initial_domain())
>  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
>  }
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index 7107842..2257b66 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -6,6 +6,7 @@
>  #include <linux/math64.h>
>  #include <linux/gfp.h>
>  
> +#include <asm/paravirt.h>
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  
> @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
>  	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
>  }
>  
> +static u64 xen_steal_clock(int cpu)
> +{
> +	struct vcpu_runstate_info state;
> +
> +	BUG_ON(cpu != smp_processor_id());
> +	xen_get_runstate_snapshot(&state);
> +	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> +}
> +
>  void xen_setup_runstate_info(int cpu)
>  {
>  	struct vcpu_register_runstate_memory_area area;
> @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
>  		BUG();
>  }
>  
> +void __init xen_time_setup_guest(void)
> +{
> +	pv_time_ops.steal_clock = xen_steal_clock;
> +
> +	static_key_slow_inc(&paravirt_steal_enabled);
> +	/*
> +	 * We can't set paravirt_steal_rq_enabled as this would require the
> +	 * capability to read another cpu's runstate info.
> +	 */
> +}
> diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
> index 25a822f..44fda64 100644
> --- a/include/linux/kernel_stat.h
> +++ b/include/linux/kernel_stat.h
> @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
>  extern void account_process_tick(struct task_struct *, int user);
>  #endif
>  
> -extern void account_steal_ticks(unsigned long ticks);
>  extern void account_idle_ticks(unsigned long ticks);
>  
>  #endif /* _LINUX_KERNEL_STAT_H */
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 86abe07..5ce51c2 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
>  
>  bool xen_vcpu_stolen(int vcpu);
>  void xen_setup_runstate_info(int cpu);
> +void __init xen_time_setup_guest(void);
>  void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
>  
>  int xen_setup_shutdown_event(void);
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 75f98c5..8c4c6dc 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick)
>  }
>  
>  /*
> - * Account multiple ticks of steal time.
> - * @p: the process from which the cpu time has been stolen
> - * @ticks: number of stolen ticks
> - */
> -void account_steal_ticks(unsigned long ticks)
> -{
> -	account_steal_time(jiffies_to_cputime(ticks));
> -}
> -
> -/*
>   * Account multiple ticks of idle time.
>   * @ticks: number of stolen ticks
>   */
>
Stefano Stabellini May 19, 2016, 5:15 p.m. UTC | #3
On Thu, 19 May 2016, Juergen Gross wrote:
> The pv_time_ops structure contains a function pointer for the
> "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
> uses its own mechanism to account for the "stolen" time a thread wasn't
> able to run due to hypervisor scheduling.
> 
> Add support in Xen arch independent time handling for this feature by
> moving it out of the arm arch into drivers/xen and remove the x86 Xen
> hack.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V3: add #include <asm/paravirt.h> to avoid build error on arm
> V2: remove the x86 do_stolen_accounting() hack
> ---
>  arch/arm/xen/enlighten.c    | 17 ++---------------
>  arch/x86/xen/time.c         | 44 ++------------------------------------------
>  drivers/xen/time.c          | 20 ++++++++++++++++++++
>  include/linux/kernel_stat.h |  1 -
>  include/xen/xen-ops.h       |  1 +
>  kernel/sched/cputime.c      | 10 ----------
>  6 files changed, 25 insertions(+), 68 deletions(-)
> 
> diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
> index 75cd734..9163b94 100644
> --- a/arch/arm/xen/enlighten.c
> +++ b/arch/arm/xen/enlighten.c
> @@ -84,19 +84,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
>  }
>  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
>  
> -static unsigned long long xen_stolen_accounting(int cpu)
> -{
> -	struct vcpu_runstate_info state;
> -
> -	BUG_ON(cpu != smp_processor_id());
> -
> -	xen_get_runstate_snapshot(&state);
> -
> -	WARN_ON(state.state != RUNSTATE_running);
> -
> -	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> -}
> -
>  static void xen_read_wallclock(struct timespec64 *ts)
>  {
>  	u32 version;
> @@ -355,8 +342,8 @@ static int __init xen_guest_init(void)
>  
>  	register_cpu_notifier(&xen_cpu_notifier);
>  
> -	pv_time_ops.steal_clock = xen_stolen_accounting;
> -	static_key_slow_inc(&paravirt_steal_enabled);
> +	xen_time_setup_guest();

You can remove

#include <asm/paravirt.h>

from headers now I believe



>  	if (xen_initial_domain())
>  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
>  
> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index a0a4e55..6be31df 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> @@ -11,8 +11,6 @@
>  #include <linux/interrupt.h>
>  #include <linux/clocksource.h>
>  #include <linux/clockchips.h>
> -#include <linux/kernel_stat.h>
> -#include <linux/math64.h>
>  #include <linux/gfp.h>
>  #include <linux/slab.h>
>  #include <linux/pvclock_gtod.h>
> @@ -31,44 +29,6 @@
>  
>  /* Xen may fire a timer up to this many ns early */
>  #define TIMER_SLOP	100000
> -#define NS_PER_TICK	(1000000000LL / HZ)
> -
> -/* snapshots of runstate info */
> -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> -
> -/* unused ns of stolen time */
> -static DEFINE_PER_CPU(u64, xen_residual_stolen);
> -
> -static void do_stolen_accounting(void)
> -{
> -	struct vcpu_runstate_info state;
> -	struct vcpu_runstate_info *snap;
> -	s64 runnable, offline, stolen;
> -	cputime_t ticks;
> -
> -	xen_get_runstate_snapshot(&state);
> -
> -	WARN_ON(state.state != RUNSTATE_running);
> -
> -	snap = this_cpu_ptr(&xen_runstate_snapshot);
> -
> -	/* work out how much time the VCPU has not been runn*ing*  */
> -	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
> -	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
> -
> -	*snap = state;
> -
> -	/* Add the appropriate number of ticks of stolen time,
> -	   including any left-overs from last time. */
> -	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
> -
> -	if (stolen < 0)
> -		stolen = 0;
> -
> -	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
> -	__this_cpu_write(xen_residual_stolen, stolen);
> -	account_steal_ticks(ticks);
> -}
>  
>  /* Get the TSC speed from Xen */
>  static unsigned long xen_tsc_khz(void)
> @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
>  		ret = IRQ_HANDLED;
>  	}
>  
> -	do_stolen_accounting();
> -
>  	return ret;
>  }
>  
> @@ -431,6 +389,8 @@ static void __init xen_time_init(void)
>  	xen_setup_timer(cpu);
>  	xen_setup_cpu_clockevents();
>  
> +	xen_time_setup_guest();
> +
>  	if (xen_initial_domain())
>  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
>  }
> diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> index 7107842..2257b66 100644
> --- a/drivers/xen/time.c
> +++ b/drivers/xen/time.c
> @@ -6,6 +6,7 @@
>  #include <linux/math64.h>
>  #include <linux/gfp.h>
>  
> +#include <asm/paravirt.h>
>  #include <asm/xen/hypervisor.h>
>  #include <asm/xen/hypercall.h>
>  
> @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
>  	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
>  }
>  
> +static u64 xen_steal_clock(int cpu)
> +{
> +	struct vcpu_runstate_info state;
> +
> +	BUG_ON(cpu != smp_processor_id());
> +	xen_get_runstate_snapshot(&state);
> +	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> +}
> +
>  void xen_setup_runstate_info(int cpu)
>  {
>  	struct vcpu_register_runstate_memory_area area;
> @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
>  		BUG();
>  }
>  
> +void __init xen_time_setup_guest(void)
> +{
> +	pv_time_ops.steal_clock = xen_steal_clock;
> +
> +	static_key_slow_inc(&paravirt_steal_enabled);
> +	/*
> +	 * We can't set paravirt_steal_rq_enabled as this would require the
> +	 * capability to read another cpu's runstate info.
> +	 */
> +}
> diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
> index 25a822f..44fda64 100644
> --- a/include/linux/kernel_stat.h
> +++ b/include/linux/kernel_stat.h
> @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
>  extern void account_process_tick(struct task_struct *, int user);
>  #endif
>  
> -extern void account_steal_ticks(unsigned long ticks);
>  extern void account_idle_ticks(unsigned long ticks);
>  
>  #endif /* _LINUX_KERNEL_STAT_H */
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index 86abe07..5ce51c2 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
>  
>  bool xen_vcpu_stolen(int vcpu);
>  void xen_setup_runstate_info(int cpu);
> +void __init xen_time_setup_guest(void);

please avoid __init in headers


>  void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
>  
>  int xen_setup_shutdown_event(void);
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 75f98c5..8c4c6dc 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick)
>  }
>  
>  /*
> - * Account multiple ticks of steal time.
> - * @p: the process from which the cpu time has been stolen
> - * @ticks: number of stolen ticks
> - */
> -void account_steal_ticks(unsigned long ticks)
> -{
> -	account_steal_time(jiffies_to_cputime(ticks));
> -}
> -
> -/*
>   * Account multiple ticks of idle time.
>   * @ticks: number of stolen ticks
>   */
> -- 
> 2.6.6
>
Stefano Stabellini May 19, 2016, 5:17 p.m. UTC | #4
On Thu, 19 May 2016, Stefano Stabellini wrote:
> On Thu, 19 May 2016, Juergen Gross wrote:
> > The pv_time_ops structure contains a function pointer for the
> > "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
> > uses its own mechanism to account for the "stolen" time a thread wasn't
> > able to run due to hypervisor scheduling.
> > 
> > Add support in Xen arch independent time handling for this feature by
> > moving it out of the arm arch into drivers/xen and remove the x86 Xen
> > hack.
> > 
> > Signed-off-by: Juergen Gross <jgross@suse.com>
> > ---
> > V3: add #include <asm/paravirt.h> to avoid build error on arm
> > V2: remove the x86 do_stolen_accounting() hack
> > ---
> >  arch/arm/xen/enlighten.c    | 17 ++---------------
> >  arch/x86/xen/time.c         | 44 ++------------------------------------------
> >  drivers/xen/time.c          | 20 ++++++++++++++++++++
> >  include/linux/kernel_stat.h |  1 -
> >  include/xen/xen-ops.h       |  1 +
> >  kernel/sched/cputime.c      | 10 ----------
> >  6 files changed, 25 insertions(+), 68 deletions(-)
> > 
> > diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
> > index 75cd734..9163b94 100644
> > --- a/arch/arm/xen/enlighten.c
> > +++ b/arch/arm/xen/enlighten.c
> > @@ -84,19 +84,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
> >  }
> >  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
> >  
> > -static unsigned long long xen_stolen_accounting(int cpu)
> > -{
> > -	struct vcpu_runstate_info state;
> > -
> > -	BUG_ON(cpu != smp_processor_id());
> > -
> > -	xen_get_runstate_snapshot(&state);
> > -
> > -	WARN_ON(state.state != RUNSTATE_running);
> > -
> > -	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> > -}
> > -
> >  static void xen_read_wallclock(struct timespec64 *ts)
> >  {
> >  	u32 version;
> > @@ -355,8 +342,8 @@ static int __init xen_guest_init(void)
> >  
> >  	register_cpu_notifier(&xen_cpu_notifier);
> >  
> > -	pv_time_ops.steal_clock = xen_stolen_accounting;
> > -	static_key_slow_inc(&paravirt_steal_enabled);
> > +	xen_time_setup_guest();
> 
> You can remove
> 
> #include <asm/paravirt.h>
> 
> from headers now I believe

Sorry for the broken English. I meant: you can remove #include
<asm/paravirt.h> from the top of the file now I believe.


> 
> >  	if (xen_initial_domain())
> >  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
> >  
> > diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> > index a0a4e55..6be31df 100644
> > --- a/arch/x86/xen/time.c
> > +++ b/arch/x86/xen/time.c
> > @@ -11,8 +11,6 @@
> >  #include <linux/interrupt.h>
> >  #include <linux/clocksource.h>
> >  #include <linux/clockchips.h>
> > -#include <linux/kernel_stat.h>
> > -#include <linux/math64.h>
> >  #include <linux/gfp.h>
> >  #include <linux/slab.h>
> >  #include <linux/pvclock_gtod.h>
> > @@ -31,44 +29,6 @@
> >  
> >  /* Xen may fire a timer up to this many ns early */
> >  #define TIMER_SLOP	100000
> > -#define NS_PER_TICK	(1000000000LL / HZ)
> > -
> > -/* snapshots of runstate info */
> > -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
> > -
> > -/* unused ns of stolen time */
> > -static DEFINE_PER_CPU(u64, xen_residual_stolen);
> > -
> > -static void do_stolen_accounting(void)
> > -{
> > -	struct vcpu_runstate_info state;
> > -	struct vcpu_runstate_info *snap;
> > -	s64 runnable, offline, stolen;
> > -	cputime_t ticks;
> > -
> > -	xen_get_runstate_snapshot(&state);
> > -
> > -	WARN_ON(state.state != RUNSTATE_running);
> > -
> > -	snap = this_cpu_ptr(&xen_runstate_snapshot);
> > -
> > -	/* work out how much time the VCPU has not been runn*ing*  */
> > -	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
> > -	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
> > -
> > -	*snap = state;
> > -
> > -	/* Add the appropriate number of ticks of stolen time,
> > -	   including any left-overs from last time. */
> > -	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
> > -
> > -	if (stolen < 0)
> > -		stolen = 0;
> > -
> > -	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
> > -	__this_cpu_write(xen_residual_stolen, stolen);
> > -	account_steal_ticks(ticks);
> > -}
> >  
> >  /* Get the TSC speed from Xen */
> >  static unsigned long xen_tsc_khz(void)
> > @@ -335,8 +295,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
> >  		ret = IRQ_HANDLED;
> >  	}
> >  
> > -	do_stolen_accounting();
> > -
> >  	return ret;
> >  }
> >  
> > @@ -431,6 +389,8 @@ static void __init xen_time_init(void)
> >  	xen_setup_timer(cpu);
> >  	xen_setup_cpu_clockevents();
> >  
> > +	xen_time_setup_guest();
> > +
> >  	if (xen_initial_domain())
> >  		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
> >  }
> > diff --git a/drivers/xen/time.c b/drivers/xen/time.c
> > index 7107842..2257b66 100644
> > --- a/drivers/xen/time.c
> > +++ b/drivers/xen/time.c
> > @@ -6,6 +6,7 @@
> >  #include <linux/math64.h>
> >  #include <linux/gfp.h>
> >  
> > +#include <asm/paravirt.h>
> >  #include <asm/xen/hypervisor.h>
> >  #include <asm/xen/hypercall.h>
> >  
> > @@ -75,6 +76,15 @@ bool xen_vcpu_stolen(int vcpu)
> >  	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
> >  }
> >  
> > +static u64 xen_steal_clock(int cpu)
> > +{
> > +	struct vcpu_runstate_info state;
> > +
> > +	BUG_ON(cpu != smp_processor_id());
> > +	xen_get_runstate_snapshot(&state);
> > +	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
> > +}
> > +
> >  void xen_setup_runstate_info(int cpu)
> >  {
> >  	struct vcpu_register_runstate_memory_area area;
> > @@ -86,3 +96,13 @@ void xen_setup_runstate_info(int cpu)
> >  		BUG();
> >  }
> >  
> > +void __init xen_time_setup_guest(void)
> > +{
> > +	pv_time_ops.steal_clock = xen_steal_clock;
> > +
> > +	static_key_slow_inc(&paravirt_steal_enabled);
> > +	/*
> > +	 * We can't set paravirt_steal_rq_enabled as this would require the
> > +	 * capability to read another cpu's runstate info.
> > +	 */
> > +}
> > diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
> > index 25a822f..44fda64 100644
> > --- a/include/linux/kernel_stat.h
> > +++ b/include/linux/kernel_stat.h
> > @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
> >  extern void account_process_tick(struct task_struct *, int user);
> >  #endif
> >  
> > -extern void account_steal_ticks(unsigned long ticks);
> >  extern void account_idle_ticks(unsigned long ticks);
> >  
> >  #endif /* _LINUX_KERNEL_STAT_H */
> > diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> > index 86abe07..5ce51c2 100644
> > --- a/include/xen/xen-ops.h
> > +++ b/include/xen/xen-ops.h
> > @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
> >  
> >  bool xen_vcpu_stolen(int vcpu);
> >  void xen_setup_runstate_info(int cpu);
> > +void __init xen_time_setup_guest(void);
> 
> please avoid __init in headers
> 
> 
> >  void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
> >  
> >  int xen_setup_shutdown_event(void);
> > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> > index 75f98c5..8c4c6dc 100644
> > --- a/kernel/sched/cputime.c
> > +++ b/kernel/sched/cputime.c
> > @@ -490,16 +490,6 @@ void account_process_tick(struct task_struct *p, int user_tick)
> >  }
> >  
> >  /*
> > - * Account multiple ticks of steal time.
> > - * @p: the process from which the cpu time has been stolen
> > - * @ticks: number of stolen ticks
> > - */
> > -void account_steal_ticks(unsigned long ticks)
> > -{
> > -	account_steal_time(jiffies_to_cputime(ticks));
> > -}
> > -
> > -/*
> >   * Account multiple ticks of idle time.
> >   * @ticks: number of stolen ticks
> >   */
> > -- 
> > 2.6.6
> > 
>
Jürgen Groß May 20, 2016, 7:17 a.m. UTC | #5
On 19/05/16 19:15, Stefano Stabellini wrote:
> On Thu, 19 May 2016, Juergen Gross wrote:
>> The pv_time_ops structure contains a function pointer for the
>> "steal_clock" functionality used only by KVM and Xen on ARM. Xen on x86
>> uses its own mechanism to account for the "stolen" time a thread wasn't
>> able to run due to hypervisor scheduling.
>>
>> Add support in Xen arch independent time handling for this feature by
>> moving it out of the arm arch into drivers/xen and remove the x86 Xen
>> hack.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V3: add #include <asm/paravirt.h> to avoid build error on arm
>> V2: remove the x86 do_stolen_accounting() hack
>> ---
>>  arch/arm/xen/enlighten.c    | 17 ++---------------
>>  arch/x86/xen/time.c         | 44 ++------------------------------------------
>>  drivers/xen/time.c          | 20 ++++++++++++++++++++
>>  include/linux/kernel_stat.h |  1 -
>>  include/xen/xen-ops.h       |  1 +
>>  kernel/sched/cputime.c      | 10 ----------
>>  6 files changed, 25 insertions(+), 68 deletions(-)
>>
>> diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
>> index 75cd734..9163b94 100644
>> --- a/arch/arm/xen/enlighten.c
>> +++ b/arch/arm/xen/enlighten.c
>> @@ -84,19 +84,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
>>  }
>>  EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
>>  
>> -static unsigned long long xen_stolen_accounting(int cpu)
>> -{
>> -	struct vcpu_runstate_info state;
>> -
>> -	BUG_ON(cpu != smp_processor_id());
>> -
>> -	xen_get_runstate_snapshot(&state);
>> -
>> -	WARN_ON(state.state != RUNSTATE_running);
>> -
>> -	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
>> -}
>> -
>>  static void xen_read_wallclock(struct timespec64 *ts)
>>  {
>>  	u32 version;
>> @@ -355,8 +342,8 @@ static int __init xen_guest_init(void)
>>  
>>  	register_cpu_notifier(&xen_cpu_notifier);
>>  
>> -	pv_time_ops.steal_clock = xen_stolen_accounting;
>> -	static_key_slow_inc(&paravirt_steal_enabled);
>> +	xen_time_setup_guest();
> 
> You can remove
> 
> #include <asm/paravirt.h>
> 
> from headers now I believe

Correct. Done.

>> diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
>> index 25a822f..44fda64 100644
>> --- a/include/linux/kernel_stat.h
>> +++ b/include/linux/kernel_stat.h
>> @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user)
>>  extern void account_process_tick(struct task_struct *, int user);
>>  #endif
>>  
>> -extern void account_steal_ticks(unsigned long ticks);
>>  extern void account_idle_ticks(unsigned long ticks);
>>  
>>  #endif /* _LINUX_KERNEL_STAT_H */
>> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
>> index 86abe07..5ce51c2 100644
>> --- a/include/xen/xen-ops.h
>> +++ b/include/xen/xen-ops.h
>> @@ -21,6 +21,7 @@ void xen_resume_notifier_unregister(struct notifier_block *nb);
>>  
>>  bool xen_vcpu_stolen(int vcpu);
>>  void xen_setup_runstate_info(int cpu);
>> +void __init xen_time_setup_guest(void);
> 
> please avoid __init in headers

Okay.


Juergen
diff mbox

Patch

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 75cd734..9163b94 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -84,19 +84,6 @@  int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
 }
 EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
 
-static unsigned long long xen_stolen_accounting(int cpu)
-{
-	struct vcpu_runstate_info state;
-
-	BUG_ON(cpu != smp_processor_id());
-
-	xen_get_runstate_snapshot(&state);
-
-	WARN_ON(state.state != RUNSTATE_running);
-
-	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
-}
-
 static void xen_read_wallclock(struct timespec64 *ts)
 {
 	u32 version;
@@ -355,8 +342,8 @@  static int __init xen_guest_init(void)
 
 	register_cpu_notifier(&xen_cpu_notifier);
 
-	pv_time_ops.steal_clock = xen_stolen_accounting;
-	static_key_slow_inc(&paravirt_steal_enabled);
+	xen_time_setup_guest();
+
 	if (xen_initial_domain())
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a0a4e55..6be31df 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -11,8 +11,6 @@ 
 #include <linux/interrupt.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
-#include <linux/kernel_stat.h>
-#include <linux/math64.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/pvclock_gtod.h>
@@ -31,44 +29,6 @@ 
 
 /* Xen may fire a timer up to this many ns early */
 #define TIMER_SLOP	100000
-#define NS_PER_TICK	(1000000000LL / HZ)
-
-/* snapshots of runstate info */
-static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot);
-
-/* unused ns of stolen time */
-static DEFINE_PER_CPU(u64, xen_residual_stolen);
-
-static void do_stolen_accounting(void)
-{
-	struct vcpu_runstate_info state;
-	struct vcpu_runstate_info *snap;
-	s64 runnable, offline, stolen;
-	cputime_t ticks;
-
-	xen_get_runstate_snapshot(&state);
-
-	WARN_ON(state.state != RUNSTATE_running);
-
-	snap = this_cpu_ptr(&xen_runstate_snapshot);
-
-	/* work out how much time the VCPU has not been runn*ing*  */
-	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
-	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
-
-	*snap = state;
-
-	/* Add the appropriate number of ticks of stolen time,
-	   including any left-overs from last time. */
-	stolen = runnable + offline + __this_cpu_read(xen_residual_stolen);
-
-	if (stolen < 0)
-		stolen = 0;
-
-	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
-	__this_cpu_write(xen_residual_stolen, stolen);
-	account_steal_ticks(ticks);
-}
 
 /* Get the TSC speed from Xen */
 static unsigned long xen_tsc_khz(void)
@@ -335,8 +295,6 @@  static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 	}
 
-	do_stolen_accounting();
-
 	return ret;
 }
 
@@ -431,6 +389,8 @@  static void __init xen_time_init(void)
 	xen_setup_timer(cpu);
 	xen_setup_cpu_clockevents();
 
+	xen_time_setup_guest();
+
 	if (xen_initial_domain())
 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
 }
diff --git a/drivers/xen/time.c b/drivers/xen/time.c
index 7107842..2257b66 100644
--- a/drivers/xen/time.c
+++ b/drivers/xen/time.c
@@ -6,6 +6,7 @@ 
 #include <linux/math64.h>
 #include <linux/gfp.h>
 
+#include <asm/paravirt.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -75,6 +76,15 @@  bool xen_vcpu_stolen(int vcpu)
 	return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable;
 }
 
+static u64 xen_steal_clock(int cpu)
+{
+	struct vcpu_runstate_info state;
+
+	BUG_ON(cpu != smp_processor_id());
+	xen_get_runstate_snapshot(&state);
+	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
+}
+
 void xen_setup_runstate_info(int cpu)
 {
 	struct vcpu_register_runstate_memory_area area;
@@ -86,3 +96,13 @@  void xen_setup_runstate_info(int cpu)
 		BUG();
 }
 
+void __init xen_time_setup_guest(void)
+{
+	pv_time_ops.steal_clock = xen_steal_clock;
+
+	static_key_slow_inc(&paravirt_steal_enabled);
+	/*
+	 * We can't set paravirt_steal_rq_enabled as this would require the
+	 * capability to read another cpu's runstate info.
+	 */
+}
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 25a822f..44fda64 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -92,7 +92,6 @@  static inline void account_process_tick(struct task_struct *tsk, int user)
 extern void account_process_tick(struct task_struct *, int user);
 #endif
 
-extern void account_steal_ticks(unsigned long ticks);
 extern void account_idle_ticks(unsigned long ticks);
 
 #endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 86abe07..5ce51c2 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -21,6 +21,7 @@  void xen_resume_notifier_unregister(struct notifier_block *nb);
 
 bool xen_vcpu_stolen(int vcpu);
 void xen_setup_runstate_info(int cpu);
+void __init xen_time_setup_guest(void);
 void xen_get_runstate_snapshot(struct vcpu_runstate_info *res);
 
 int xen_setup_shutdown_event(void);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 75f98c5..8c4c6dc 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -490,16 +490,6 @@  void account_process_tick(struct task_struct *p, int user_tick)
 }
 
 /*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
-	account_steal_time(jiffies_to_cputime(ticks));
-}
-
-/*
  * Account multiple ticks of idle time.
  * @ticks: number of stolen ticks
  */