diff mbox series

[4/9] KVM: arm64: Support stolen time reporting via shared structure

Message ID 20190802145017.42543-5-steven.price@arm.com
State New, archived
Headers show
Series arm64: Stolen time support | expand

Commit Message

Steven Price Aug. 2, 2019, 2:50 p.m. UTC
Implement the service call for configuring a shared structre between a
VCPU and the hypervisor in which the hypervisor can write the time
stolen from the VCPU's execution time by other tasks on the host.

The hypervisor allocates memory which is placed at an IPA chosen by user
space. The hypervisor then uses WRITE_ONCE() to update the shared
structre ensuring single copy atomicity of the 64-bit unsigned value
that reports stolen time in nanoseconds.

Whenever stolen time is enabled by the guest, the stolen time counter is
reset.

The stolen time itself is retrieved from the sched_info structure
maintained by the Linux scheduler code. We enable SCHEDSTATS when
selecting KVM Kconfig to ensure this value is meaningful.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 13 +++++-
 arch/arm64/kvm/Kconfig            |  1 +
 include/kvm/arm_hypercalls.h      |  1 +
 include/linux/kvm_types.h         |  2 +
 virt/kvm/arm/arm.c                | 18 ++++++++
 virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
 6 files changed, 104 insertions(+), 1 deletion(-)

Comments

Marc Zyngier Aug. 3, 2019, 11:55 a.m. UTC | #1
On Fri,  2 Aug 2019 15:50:12 +0100
Steven Price <steven.price@arm.com> wrote:

> Implement the service call for configuring a shared structre between a

structure

> VCPU and the hypervisor in which the hypervisor can write the time
> stolen from the VCPU's execution time by other tasks on the host.
> 
> The hypervisor allocates memory which is placed at an IPA chosen by user
> space. The hypervisor then uses WRITE_ONCE() to update the shared
> structre ensuring single copy atomicity of the 64-bit unsigned value

structure

> that reports stolen time in nanoseconds.
> 
> Whenever stolen time is enabled by the guest, the stolen time counter is
> reset.
> 
> The stolen time itself is retrieved from the sched_info structure
> maintained by the Linux scheduler code. We enable SCHEDSTATS when
> selecting KVM Kconfig to ensure this value is meaningful.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h | 13 +++++-
>  arch/arm64/kvm/Kconfig            |  1 +
>  include/kvm/arm_hypercalls.h      |  1 +
>  include/linux/kvm_types.h         |  2 +
>  virt/kvm/arm/arm.c                | 18 ++++++++
>  virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
>  6 files changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index f656169db8c3..78f270190d43 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,6 +44,7 @@
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> @@ -83,6 +84,11 @@ struct kvm_arch {
>  
>  	/* Mandated version of PSCI */
>  	u32 psci_version;
> +
> +	struct kvm_arch_pvtime {
> +		void *st;

Is it really a void *? I'm sure you can use a proper type here...

> +		gpa_t st_base;
> +	} pvtime;
>  };
>  
>  #define KVM_NR_MEM_OBJS     40
> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>  	/* True when deferrable sysregs are loaded on the physical CPU,
>  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>  	bool sysregs_loaded_on_cpu;
> -};
>  
> +	/* Guest PV state */
> +	struct {
> +		u64 steal;
> +		u64 last_steal;
> +	} steal;
> +};
>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index a67121d419a2..d8b88e40d223 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -39,6 +39,7 @@ config KVM
>  	select IRQ_BYPASS_MANAGER
>  	select HAVE_KVM_IRQ_BYPASS
>  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
> +	select SCHEDSTATS
>  	---help---
>  	  Support hosting virtualized guest machines.
>  	  We don't support KVM with 16K page tables yet, due to the multiple
> diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
> index 35a5abcc4ca3..9f0710ab4292 100644
> --- a/include/kvm/arm_hypercalls.h
> +++ b/include/kvm/arm_hypercalls.h
> @@ -7,6 +7,7 @@
>  #include <asm/kvm_emulate.h>
>  
>  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
>  
>  static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
>  {
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index bde5374ae021..1c88e69db3d9 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>  typedef u64            gpa_t;
>  typedef u64            gfn_t;
>  
> +#define GPA_INVALID	(~(gpa_t)0)
> +
>  typedef unsigned long  hva_t;
>  typedef u64            hpa_t;
>  typedef u64            hfn_t;
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index f645c0fbf7ec..ebd963d2580b 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -40,6 +40,10 @@
>  #include <asm/kvm_coproc.h>
>  #include <asm/sections.h>
>  
> +#include <kvm/arm_hypercalls.h>
> +#include <kvm/arm_pmu.h>
> +#include <kvm/arm_psci.h>
> +
>  #ifdef REQUIRES_VIRT
>  __asm__(".arch_extension	virt");
>  #endif
> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	kvm->arch.max_vcpus = vgic_present ?
>  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>  
> +	kvm->arch.pvtime.st_base = GPA_INVALID;
>  	return ret;
>  out_free_stage2_pgd:
>  	kvm_free_stage2_pgd(kvm);
> @@ -371,6 +376,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	kvm_vcpu_load_sysregs(vcpu);
>  	kvm_arch_vcpu_load_fp(vcpu);
>  	kvm_vcpu_pmu_restore_guest(vcpu);
> +	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>  
>  	if (single_task_running())
>  		vcpu_clear_wfe_traps(vcpu);
> @@ -617,6 +623,15 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>  	smp_rmb();
>  }
>  
> +static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
> +{
> +	int idx;
> +
> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> +	kvm_update_stolen_time(vcpu);
> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +}
> +
>  static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
>  {
>  	return vcpu->arch.target >= 0;
> @@ -636,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>  		 * that a VCPU sees new virtual interrupts.
>  		 */
>  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
> +			vcpu_req_record_steal(vcpu);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
> index 2906b2df99df..196c71c8dd87 100644
> --- a/virt/kvm/arm/hypercalls.c
> +++ b/virt/kvm/arm/hypercalls.c
> @@ -10,6 +10,70 @@
>  #include <kvm/arm_hypercalls.h>
>  #include <kvm/arm_psci.h>
>  
> +
> +static struct pvclock_vcpu_stolen_time_info *pvtime_get_st(
> +		struct kvm_vcpu *vcpu)

nit: on a single line.

> +{
> +	struct pvclock_vcpu_stolen_time_info *st = vcpu->kvm->arch.pvtime.st;
> +
> +	if (!st)
> +		return NULL;
> +
> +	return &st[kvm_vcpu_get_idx(vcpu)];
> +}
> +
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	u64 steal;
> +	struct pvclock_vcpu_stolen_time_info *kaddr;
> +
> +	if (vcpu->kvm->arch.pvtime.st_base == GPA_INVALID)
> +		return -ENOTSUPP;

So for a guest that doesn't have stolen time support (which is 100% of
them for the foreseeable future), we still set a request, take the srcu
lock and end-up here for nothing. I'd rather we test this st_base
early, as it should never change once the guest has started.

> +
> +	kaddr = pvtime_get_st(vcpu);
> +
> +	if (!kaddr)
> +		return -ENOTSUPP;

How can this happen?

> +
> +	kaddr->revision = 0;
> +	kaddr->attributes = 0;

Why does this need to be written each time we update the stolen time? I
have the feeling this would be better moved to the hypercall
initializing the data structure.

> +
> +	/* Let's do the local bookkeeping */
> +	steal = vcpu->arch.steal.steal;
> +	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +	vcpu->arch.steal.steal = steal;
> +
> +	/* Now write out the value to the shared page */
> +	WRITE_ONCE(kaddr->stolen_time, cpu_to_le64(steal));

Is there any requirement for this to be visible to another CPU than the
one this is being written from?

> +
> +	return 0;
> +}
> +
> +static int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	u64 ret;
> +	int err;
> +
> +	/*
> +	 * Start counting stolen time from the time the guest requests
> +	 * the feature enabled.
> +	 */
> +	vcpu->arch.steal.steal = 0;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +
> +	err = kvm_update_stolen_time(vcpu);
> +
> +	if (err)
> +		ret = SMCCC_RET_NOT_SUPPORTED;
> +	else
> +		ret = vcpu->kvm->arch.pvtime.st_base +
> +			(sizeof(struct pvclock_vcpu_stolen_time_info) *
> +			 kvm_vcpu_get_idx(vcpu));
> +
> +	smccc_set_retval(vcpu, ret, 0, 0, 0);
> +	return 1;
> +}
>  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>  {
>  	u32 func_id = smccc_get_function(vcpu);
> @@ -57,8 +121,14 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>  	case ARM_SMCCC_HV_PV_FEATURES:
>  		feature = smccc_get_arg1(vcpu);
>  		switch (feature) {
> +		case ARM_SMCCC_HV_PV_FEATURES:
> +		case ARM_SMCCC_HV_PV_TIME_ST:
> +			val = SMCCC_RET_SUCCESS;
> +			break;
>  		}
>  		break;
> +	case ARM_SMCCC_HV_PV_TIME_ST:
> +		return kvm_hypercall_stolen_time(vcpu);
>  	default:
>  		return kvm_psci_call(vcpu);
>  	}


Thanks,

	M.
Marc Zyngier Aug. 3, 2019, 5:58 p.m. UTC | #2
On Fri,  2 Aug 2019 15:50:12 +0100
Steven Price <steven.price@arm.com> wrote:

> Implement the service call for configuring a shared structre between a
> VCPU and the hypervisor in which the hypervisor can write the time
> stolen from the VCPU's execution time by other tasks on the host.
> 
> The hypervisor allocates memory which is placed at an IPA chosen by user
> space. The hypervisor then uses WRITE_ONCE() to update the shared
> structre ensuring single copy atomicity of the 64-bit unsigned value
> that reports stolen time in nanoseconds.
> 
> Whenever stolen time is enabled by the guest, the stolen time counter is
> reset.
> 
> The stolen time itself is retrieved from the sched_info structure
> maintained by the Linux scheduler code. We enable SCHEDSTATS when
> selecting KVM Kconfig to ensure this value is meaningful.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h | 13 +++++-
>  arch/arm64/kvm/Kconfig            |  1 +
>  include/kvm/arm_hypercalls.h      |  1 +
>  include/linux/kvm_types.h         |  2 +
>  virt/kvm/arm/arm.c                | 18 ++++++++
>  virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
>  6 files changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index f656169db8c3..78f270190d43 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,6 +44,7 @@
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> @@ -83,6 +84,11 @@ struct kvm_arch {
>  
>  	/* Mandated version of PSCI */
>  	u32 psci_version;
> +
> +	struct kvm_arch_pvtime {
> +		void *st;
> +		gpa_t st_base;
> +	} pvtime;
>  };
>  
>  #define KVM_NR_MEM_OBJS     40
> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>  	/* True when deferrable sysregs are loaded on the physical CPU,
>  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>  	bool sysregs_loaded_on_cpu;
> -};
>  
> +	/* Guest PV state */
> +	struct {
> +		u64 steal;
> +		u64 last_steal;
> +	} steal;
> +};
>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index a67121d419a2..d8b88e40d223 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -39,6 +39,7 @@ config KVM
>  	select IRQ_BYPASS_MANAGER
>  	select HAVE_KVM_IRQ_BYPASS
>  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
> +	select SCHEDSTATS
>  	---help---
>  	  Support hosting virtualized guest machines.
>  	  We don't support KVM with 16K page tables yet, due to the multiple
> diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
> index 35a5abcc4ca3..9f0710ab4292 100644
> --- a/include/kvm/arm_hypercalls.h
> +++ b/include/kvm/arm_hypercalls.h
> @@ -7,6 +7,7 @@
>  #include <asm/kvm_emulate.h>
>  
>  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
>  
>  static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
>  {
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index bde5374ae021..1c88e69db3d9 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>  typedef u64            gpa_t;
>  typedef u64            gfn_t;
>  
> +#define GPA_INVALID	(~(gpa_t)0)
> +
>  typedef unsigned long  hva_t;
>  typedef u64            hpa_t;
>  typedef u64            hfn_t;
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index f645c0fbf7ec..ebd963d2580b 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -40,6 +40,10 @@
>  #include <asm/kvm_coproc.h>
>  #include <asm/sections.h>
>  
> +#include <kvm/arm_hypercalls.h>
> +#include <kvm/arm_pmu.h>
> +#include <kvm/arm_psci.h>
> +
>  #ifdef REQUIRES_VIRT
>  __asm__(".arch_extension	virt");
>  #endif
> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	kvm->arch.max_vcpus = vgic_present ?
>  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>  
> +	kvm->arch.pvtime.st_base = GPA_INVALID;
>  	return ret;
>  out_free_stage2_pgd:
>  	kvm_free_stage2_pgd(kvm);
> @@ -371,6 +376,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	kvm_vcpu_load_sysregs(vcpu);
>  	kvm_arch_vcpu_load_fp(vcpu);
>  	kvm_vcpu_pmu_restore_guest(vcpu);
> +	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>  
>  	if (single_task_running())
>  		vcpu_clear_wfe_traps(vcpu);
> @@ -617,6 +623,15 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>  	smp_rmb();
>  }
>  
> +static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
> +{
> +	int idx;
> +
> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> +	kvm_update_stolen_time(vcpu);
> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +}
> +
>  static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
>  {
>  	return vcpu->arch.target >= 0;
> @@ -636,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>  		 * that a VCPU sees new virtual interrupts.
>  		 */
>  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
> +			vcpu_req_record_steal(vcpu);

Something troubles me. Here, you've set the request on load. But you
can be preempted at any time (preemption gets disabled just after).

I have the feeling that should you get preempted right here, you'll
end-up having accumulated the wrong amount of steal time, as the
request put via load when you'll get scheduled back in will only get
processed after a full round of entry/exit/entry, which doesn't look
great.

Am I getting it right?

	M.
Marc Zyngier Aug. 3, 2019, 6:13 p.m. UTC | #3
On Sat, 3 Aug 2019 18:58:17 +0100
Marc Zyngier <maz@kernel.org> wrote:

> On Fri,  2 Aug 2019 15:50:12 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
> > Implement the service call for configuring a shared structre between a
> > VCPU and the hypervisor in which the hypervisor can write the time
> > stolen from the VCPU's execution time by other tasks on the host.
> > 
> > The hypervisor allocates memory which is placed at an IPA chosen by user
> > space. The hypervisor then uses WRITE_ONCE() to update the shared
> > structre ensuring single copy atomicity of the 64-bit unsigned value
> > that reports stolen time in nanoseconds.
> > 
> > Whenever stolen time is enabled by the guest, the stolen time counter is
> > reset.
> > 
> > The stolen time itself is retrieved from the sched_info structure
> > maintained by the Linux scheduler code. We enable SCHEDSTATS when
> > selecting KVM Kconfig to ensure this value is meaningful.
> > 
> > Signed-off-by: Steven Price <steven.price@arm.com>
> > ---
> >  arch/arm64/include/asm/kvm_host.h | 13 +++++-
> >  arch/arm64/kvm/Kconfig            |  1 +
> >  include/kvm/arm_hypercalls.h      |  1 +
> >  include/linux/kvm_types.h         |  2 +
> >  virt/kvm/arm/arm.c                | 18 ++++++++
> >  virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
> >  6 files changed, 104 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index f656169db8c3..78f270190d43 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -44,6 +44,7 @@
> >  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> >  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
> >  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> > +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
> >  
> >  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
> >  
> > @@ -83,6 +84,11 @@ struct kvm_arch {
> >  
> >  	/* Mandated version of PSCI */
> >  	u32 psci_version;
> > +
> > +	struct kvm_arch_pvtime {
> > +		void *st;
> > +		gpa_t st_base;
> > +	} pvtime;
> >  };
> >  
> >  #define KVM_NR_MEM_OBJS     40
> > @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
> >  	/* True when deferrable sysregs are loaded on the physical CPU,
> >  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
> >  	bool sysregs_loaded_on_cpu;
> > -};
> >  
> > +	/* Guest PV state */
> > +	struct {
> > +		u64 steal;
> > +		u64 last_steal;
> > +	} steal;
> > +};
> >  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> >  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> >  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> > diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> > index a67121d419a2..d8b88e40d223 100644
> > --- a/arch/arm64/kvm/Kconfig
> > +++ b/arch/arm64/kvm/Kconfig
> > @@ -39,6 +39,7 @@ config KVM
> >  	select IRQ_BYPASS_MANAGER
> >  	select HAVE_KVM_IRQ_BYPASS
> >  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
> > +	select SCHEDSTATS
> >  	---help---
> >  	  Support hosting virtualized guest machines.
> >  	  We don't support KVM with 16K page tables yet, due to the multiple
> > diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
> > index 35a5abcc4ca3..9f0710ab4292 100644
> > --- a/include/kvm/arm_hypercalls.h
> > +++ b/include/kvm/arm_hypercalls.h
> > @@ -7,6 +7,7 @@
> >  #include <asm/kvm_emulate.h>
> >  
> >  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
> > +int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
> >  
> >  static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
> >  {
> > diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> > index bde5374ae021..1c88e69db3d9 100644
> > --- a/include/linux/kvm_types.h
> > +++ b/include/linux/kvm_types.h
> > @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
> >  typedef u64            gpa_t;
> >  typedef u64            gfn_t;
> >  
> > +#define GPA_INVALID	(~(gpa_t)0)
> > +
> >  typedef unsigned long  hva_t;
> >  typedef u64            hpa_t;
> >  typedef u64            hfn_t;
> > diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> > index f645c0fbf7ec..ebd963d2580b 100644
> > --- a/virt/kvm/arm/arm.c
> > +++ b/virt/kvm/arm/arm.c
> > @@ -40,6 +40,10 @@
> >  #include <asm/kvm_coproc.h>
> >  #include <asm/sections.h>
> >  
> > +#include <kvm/arm_hypercalls.h>
> > +#include <kvm/arm_pmu.h>
> > +#include <kvm/arm_psci.h>
> > +
> >  #ifdef REQUIRES_VIRT
> >  __asm__(".arch_extension	virt");
> >  #endif
> > @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> >  	kvm->arch.max_vcpus = vgic_present ?
> >  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
> >  
> > +	kvm->arch.pvtime.st_base = GPA_INVALID;
> >  	return ret;
> >  out_free_stage2_pgd:
> >  	kvm_free_stage2_pgd(kvm);
> > @@ -371,6 +376,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >  	kvm_vcpu_load_sysregs(vcpu);
> >  	kvm_arch_vcpu_load_fp(vcpu);
> >  	kvm_vcpu_pmu_restore_guest(vcpu);
> > +	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
> >  
> >  	if (single_task_running())
> >  		vcpu_clear_wfe_traps(vcpu);
> > @@ -617,6 +623,15 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
> >  	smp_rmb();
> >  }
> >  
> > +static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
> > +{
> > +	int idx;
> > +
> > +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> > +	kvm_update_stolen_time(vcpu);
> > +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > +}
> > +
> >  static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
> >  {
> >  	return vcpu->arch.target >= 0;
> > @@ -636,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
> >  		 * that a VCPU sees new virtual interrupts.
> >  		 */
> >  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> > +
> > +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
> > +			vcpu_req_record_steal(vcpu);  
> 
> Something troubles me. Here, you've set the request on load. But you
> can be preempted at any time (preemption gets disabled just after).
> 
> I have the feeling that should you get preempted right here, you'll
> end-up having accumulated the wrong amount of steal time, as the
> request put via load when you'll get scheduled back in will only get
> processed after a full round of entry/exit/entry, which doesn't look
> great.

Ah, no. We're saved by the check for pending requests right before we
jump in the guest, causing an early exit and the whole shebang to be
restarted.

	M.
Steven Price Aug. 5, 2019, 2:09 p.m. UTC | #4
On 03/08/2019 12:55, Marc Zyngier wrote:
> On Fri,  2 Aug 2019 15:50:12 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>> Implement the service call for configuring a shared structre between a
> 
> structure
> 
>> VCPU and the hypervisor in which the hypervisor can write the time
>> stolen from the VCPU's execution time by other tasks on the host.
>>
>> The hypervisor allocates memory which is placed at an IPA chosen by user
>> space. The hypervisor then uses WRITE_ONCE() to update the shared
>> structre ensuring single copy atomicity of the 64-bit unsigned value
> 
> structure

Twice in one commit message... thanks for spotting! :)

>> that reports stolen time in nanoseconds.
>>
>> Whenever stolen time is enabled by the guest, the stolen time counter is
>> reset.
>>
>> The stolen time itself is retrieved from the sched_info structure
>> maintained by the Linux scheduler code. We enable SCHEDSTATS when
>> selecting KVM Kconfig to ensure this value is meaningful.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>  arch/arm64/include/asm/kvm_host.h | 13 +++++-
>>  arch/arm64/kvm/Kconfig            |  1 +
>>  include/kvm/arm_hypercalls.h      |  1 +
>>  include/linux/kvm_types.h         |  2 +
>>  virt/kvm/arm/arm.c                | 18 ++++++++
>>  virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
>>  6 files changed, 104 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index f656169db8c3..78f270190d43 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -44,6 +44,7 @@
>>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
>> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>>  
>>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>  
>> @@ -83,6 +84,11 @@ struct kvm_arch {
>>  
>>  	/* Mandated version of PSCI */
>>  	u32 psci_version;
>> +
>> +	struct kvm_arch_pvtime {
>> +		void *st;
> 
> Is it really a void *? I'm sure you can use a proper type here...

Indeed that sounds like a good idea!

>> +		gpa_t st_base;
>> +	} pvtime;
>>  };
>>  
>>  #define KVM_NR_MEM_OBJS     40
>> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>>  	/* True when deferrable sysregs are loaded on the physical CPU,
>>  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>>  	bool sysregs_loaded_on_cpu;
>> -};
>>  
>> +	/* Guest PV state */
>> +	struct {
>> +		u64 steal;
>> +		u64 last_steal;
>> +	} steal;
>> +};
>>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
>> index a67121d419a2..d8b88e40d223 100644
>> --- a/arch/arm64/kvm/Kconfig
>> +++ b/arch/arm64/kvm/Kconfig
>> @@ -39,6 +39,7 @@ config KVM
>>  	select IRQ_BYPASS_MANAGER
>>  	select HAVE_KVM_IRQ_BYPASS
>>  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
>> +	select SCHEDSTATS
>>  	---help---
>>  	  Support hosting virtualized guest machines.
>>  	  We don't support KVM with 16K page tables yet, due to the multiple
>> diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
>> index 35a5abcc4ca3..9f0710ab4292 100644
>> --- a/include/kvm/arm_hypercalls.h
>> +++ b/include/kvm/arm_hypercalls.h
>> @@ -7,6 +7,7 @@
>>  #include <asm/kvm_emulate.h>
>>  
>>  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
>> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
>>  
>>  static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
>>  {
>> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
>> index bde5374ae021..1c88e69db3d9 100644
>> --- a/include/linux/kvm_types.h
>> +++ b/include/linux/kvm_types.h
>> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>>  typedef u64            gpa_t;
>>  typedef u64            gfn_t;
>>  
>> +#define GPA_INVALID	(~(gpa_t)0)
>> +
>>  typedef unsigned long  hva_t;
>>  typedef u64            hpa_t;
>>  typedef u64            hfn_t;
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index f645c0fbf7ec..ebd963d2580b 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -40,6 +40,10 @@
>>  #include <asm/kvm_coproc.h>
>>  #include <asm/sections.h>
>>  
>> +#include <kvm/arm_hypercalls.h>
>> +#include <kvm/arm_pmu.h>
>> +#include <kvm/arm_psci.h>
>> +
>>  #ifdef REQUIRES_VIRT
>>  __asm__(".arch_extension	virt");
>>  #endif
>> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>  	kvm->arch.max_vcpus = vgic_present ?
>>  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>>  
>> +	kvm->arch.pvtime.st_base = GPA_INVALID;
>>  	return ret;
>>  out_free_stage2_pgd:
>>  	kvm_free_stage2_pgd(kvm);
>> @@ -371,6 +376,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>  	kvm_vcpu_load_sysregs(vcpu);
>>  	kvm_arch_vcpu_load_fp(vcpu);
>>  	kvm_vcpu_pmu_restore_guest(vcpu);
>> +	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>>  
>>  	if (single_task_running())
>>  		vcpu_clear_wfe_traps(vcpu);
>> @@ -617,6 +623,15 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>>  	smp_rmb();
>>  }
>>  
>> +static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
>> +{
>> +	int idx;
>> +
>> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
>> +	kvm_update_stolen_time(vcpu);
>> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>> +}
>> +
>>  static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
>>  {
>>  	return vcpu->arch.target >= 0;
>> @@ -636,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>>  		 * that a VCPU sees new virtual interrupts.
>>  		 */
>>  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
>> +
>> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
>> +			vcpu_req_record_steal(vcpu);
>>  	}
>>  }
>>  
>> diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
>> index 2906b2df99df..196c71c8dd87 100644
>> --- a/virt/kvm/arm/hypercalls.c
>> +++ b/virt/kvm/arm/hypercalls.c
>> @@ -10,6 +10,70 @@
>>  #include <kvm/arm_hypercalls.h>
>>  #include <kvm/arm_psci.h>
>>  
>> +
>> +static struct pvclock_vcpu_stolen_time_info *pvtime_get_st(
>> +		struct kvm_vcpu *vcpu)
> 
> nit: on a single line.
> 
>> +{
>> +	struct pvclock_vcpu_stolen_time_info *st = vcpu->kvm->arch.pvtime.st;
>> +
>> +	if (!st)
>> +		return NULL;
>> +
>> +	return &st[kvm_vcpu_get_idx(vcpu)];
>> +}
>> +
>> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu)
>> +{
>> +	u64 steal;
>> +	struct pvclock_vcpu_stolen_time_info *kaddr;
>> +
>> +	if (vcpu->kvm->arch.pvtime.st_base == GPA_INVALID)
>> +		return -ENOTSUPP;
> 
> So for a guest that doesn't have stolen time support (which is 100% of
> them for the foreseeable future), we still set a request, take the srcu
> lock and end-up here for nothing. I'd rather we test this st_base
> early, as it should never change once the guest has started.

Good point - I'll make the call to kvm_make_request() conditional on
st_base being set.

>> +
>> +	kaddr = pvtime_get_st(vcpu);
>> +
>> +	if (!kaddr)
>> +		return -ENOTSUPP;
> 
> How can this happen?

Good question, and it makes the pvtime_get_st() helper rather pointless.
Will remove.

>> +
>> +	kaddr->revision = 0;
>> +	kaddr->attributes = 0;
> 
> Why does this need to be written each time we update the stolen time? I
> have the feeling this would be better moved to the hypercall
> initializing the data structure.

Sure.

>> +
>> +	/* Let's do the local bookkeeping */
>> +	steal = vcpu->arch.steal.steal;
>> +	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
>> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
>> +	vcpu->arch.steal.steal = steal;
>> +
>> +	/* Now write out the value to the shared page */
>> +	WRITE_ONCE(kaddr->stolen_time, cpu_to_le64(steal));
> 
> Is there any requirement for this to be visible to another CPU than the
> one this is being written from?

I don't believe there is a requirement for this to be visible
synchronously - there is an implicit race here if another vCPU is
accessing the structure (because the hypervisor can preempt and update
at any point), so we don't need to push this out with a barrier. However
we must use 64-bit single-copy atomicity writes to ensure that another
vCPU can't see a half-updated value.

Thanks,

Steve
Steven Price Aug. 5, 2019, 2:18 p.m. UTC | #5
On 03/08/2019 19:13, Marc Zyngier wrote:
> On Sat, 3 Aug 2019 18:58:17 +0100
> Marc Zyngier <maz@kernel.org> wrote:
> 
>> On Fri,  2 Aug 2019 15:50:12 +0100
>> Steven Price <steven.price@arm.com> wrote:
>>
>>> Implement the service call for configuring a shared structre between a
>>> VCPU and the hypervisor in which the hypervisor can write the time
>>> stolen from the VCPU's execution time by other tasks on the host.
>>>
>>> The hypervisor allocates memory which is placed at an IPA chosen by user
>>> space. The hypervisor then uses WRITE_ONCE() to update the shared
>>> structre ensuring single copy atomicity of the 64-bit unsigned value
>>> that reports stolen time in nanoseconds.
>>>
>>> Whenever stolen time is enabled by the guest, the stolen time counter is
>>> reset.
>>>
>>> The stolen time itself is retrieved from the sched_info structure
>>> maintained by the Linux scheduler code. We enable SCHEDSTATS when
>>> selecting KVM Kconfig to ensure this value is meaningful.
>>>
>>> Signed-off-by: Steven Price <steven.price@arm.com>
>>> ---
>>>  arch/arm64/include/asm/kvm_host.h | 13 +++++-
>>>  arch/arm64/kvm/Kconfig            |  1 +
>>>  include/kvm/arm_hypercalls.h      |  1 +
>>>  include/linux/kvm_types.h         |  2 +
>>>  virt/kvm/arm/arm.c                | 18 ++++++++
>>>  virt/kvm/arm/hypercalls.c         | 70 +++++++++++++++++++++++++++++++
>>>  6 files changed, 104 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>> index f656169db8c3..78f270190d43 100644
>>> --- a/arch/arm64/include/asm/kvm_host.h
>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>> @@ -44,6 +44,7 @@
>>>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>>>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>>>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
>>> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>>>  
>>>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>>  
>>> @@ -83,6 +84,11 @@ struct kvm_arch {
>>>  
>>>  	/* Mandated version of PSCI */
>>>  	u32 psci_version;
>>> +
>>> +	struct kvm_arch_pvtime {
>>> +		void *st;
>>> +		gpa_t st_base;
>>> +	} pvtime;
>>>  };
>>>  
>>>  #define KVM_NR_MEM_OBJS     40
>>> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>>>  	/* True when deferrable sysregs are loaded on the physical CPU,
>>>  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>>>  	bool sysregs_loaded_on_cpu;
>>> -};
>>>  
>>> +	/* Guest PV state */
>>> +	struct {
>>> +		u64 steal;
>>> +		u64 last_steal;
>>> +	} steal;
>>> +};
>>>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>>>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>>>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>>> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
>>> index a67121d419a2..d8b88e40d223 100644
>>> --- a/arch/arm64/kvm/Kconfig
>>> +++ b/arch/arm64/kvm/Kconfig
>>> @@ -39,6 +39,7 @@ config KVM
>>>  	select IRQ_BYPASS_MANAGER
>>>  	select HAVE_KVM_IRQ_BYPASS
>>>  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
>>> +	select SCHEDSTATS
>>>  	---help---
>>>  	  Support hosting virtualized guest machines.
>>>  	  We don't support KVM with 16K page tables yet, due to the multiple
>>> diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
>>> index 35a5abcc4ca3..9f0710ab4292 100644
>>> --- a/include/kvm/arm_hypercalls.h
>>> +++ b/include/kvm/arm_hypercalls.h
>>> @@ -7,6 +7,7 @@
>>>  #include <asm/kvm_emulate.h>
>>>  
>>>  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
>>> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
>>>  
>>>  static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
>>>  {
>>> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
>>> index bde5374ae021..1c88e69db3d9 100644
>>> --- a/include/linux/kvm_types.h
>>> +++ b/include/linux/kvm_types.h
>>> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>>>  typedef u64            gpa_t;
>>>  typedef u64            gfn_t;
>>>  
>>> +#define GPA_INVALID	(~(gpa_t)0)
>>> +
>>>  typedef unsigned long  hva_t;
>>>  typedef u64            hpa_t;
>>>  typedef u64            hfn_t;
>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>> index f645c0fbf7ec..ebd963d2580b 100644
>>> --- a/virt/kvm/arm/arm.c
>>> +++ b/virt/kvm/arm/arm.c
>>> @@ -40,6 +40,10 @@
>>>  #include <asm/kvm_coproc.h>
>>>  #include <asm/sections.h>
>>>  
>>> +#include <kvm/arm_hypercalls.h>
>>> +#include <kvm/arm_pmu.h>
>>> +#include <kvm/arm_psci.h>
>>> +
>>>  #ifdef REQUIRES_VIRT
>>>  __asm__(".arch_extension	virt");
>>>  #endif
>>> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>>  	kvm->arch.max_vcpus = vgic_present ?
>>>  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>>>  
>>> +	kvm->arch.pvtime.st_base = GPA_INVALID;
>>>  	return ret;
>>>  out_free_stage2_pgd:
>>>  	kvm_free_stage2_pgd(kvm);
>>> @@ -371,6 +376,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>  	kvm_vcpu_load_sysregs(vcpu);
>>>  	kvm_arch_vcpu_load_fp(vcpu);
>>>  	kvm_vcpu_pmu_restore_guest(vcpu);
>>> +	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>>>  
>>>  	if (single_task_running())
>>>  		vcpu_clear_wfe_traps(vcpu);
>>> @@ -617,6 +623,15 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>>>  	smp_rmb();
>>>  }
>>>  
>>> +static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
>>> +{
>>> +	int idx;
>>> +
>>> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
>>> +	kvm_update_stolen_time(vcpu);
>>> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>>> +}
>>> +
>>>  static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
>>>  {
>>>  	return vcpu->arch.target >= 0;
>>> @@ -636,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>>>  		 * that a VCPU sees new virtual interrupts.
>>>  		 */
>>>  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
>>> +
>>> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
>>> +			vcpu_req_record_steal(vcpu);  
>>
>> Something troubles me. Here, you've set the request on load. But you
>> can be preempted at any time (preemption gets disabled just after).
>>
>> I have the feeling that should you get preempted right here, you'll
>> end-up having accumulated the wrong amount of steal time, as the
>> request put via load when you'll get scheduled back in will only get
>> processed after a full round of entry/exit/entry, which doesn't look
>> great.
> 
> Ah, no. We're saved by the check for pending requests right before we
> jump in the guest, causing an early exit and the whole shebang to be
> restarted.

Yes, that's my understanding. Obviously not ideal if it happens in that
small window, but everything is redone to get the right values in the end.

Steve
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..78f270190d43 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@ 
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -83,6 +84,11 @@  struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	struct kvm_arch_pvtime {
+		void *st;
+		gpa_t st_base;
+	} pvtime;
 };
 
 #define KVM_NR_MEM_OBJS     40
@@ -338,8 +344,13 @@  struct kvm_vcpu_arch {
 	/* True when deferrable sysregs are loaded on the physical CPU,
 	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
 	bool sysregs_loaded_on_cpu;
-};
 
+	/* Guest PV state */
+	struct {
+		u64 steal;
+		u64 last_steal;
+	} steal;
+};
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
 				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..d8b88e40d223 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,7 @@  config KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
+	select SCHEDSTATS
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
index 35a5abcc4ca3..9f0710ab4292 100644
--- a/include/kvm/arm_hypercalls.h
+++ b/include/kvm/arm_hypercalls.h
@@ -7,6 +7,7 @@ 
 #include <asm/kvm_emulate.h>
 
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+int kvm_update_stolen_time(struct kvm_vcpu *vcpu);
 
 static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
 {
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index bde5374ae021..1c88e69db3d9 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -35,6 +35,8 @@  typedef unsigned long  gva_t;
 typedef u64            gpa_t;
 typedef u64            gfn_t;
 
+#define GPA_INVALID	(~(gpa_t)0)
+
 typedef unsigned long  hva_t;
 typedef u64            hpa_t;
 typedef u64            hfn_t;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f645c0fbf7ec..ebd963d2580b 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -40,6 +40,10 @@ 
 #include <asm/kvm_coproc.h>
 #include <asm/sections.h>
 
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 #endif
@@ -135,6 +139,7 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.max_vcpus = vgic_present ?
 				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 
+	kvm->arch.pvtime.st_base = GPA_INVALID;
 	return ret;
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
@@ -371,6 +376,7 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_vcpu_load_sysregs(vcpu);
 	kvm_arch_vcpu_load_fp(vcpu);
 	kvm_vcpu_pmu_restore_guest(vcpu);
+	kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
 
 	if (single_task_running())
 		vcpu_clear_wfe_traps(vcpu);
@@ -617,6 +623,15 @@  static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 	smp_rmb();
 }
 
+static void vcpu_req_record_steal(struct kvm_vcpu *vcpu)
+{
+	int idx;
+
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	kvm_update_stolen_time(vcpu);
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+}
+
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.target >= 0;
@@ -636,6 +651,9 @@  static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 		 * that a VCPU sees new virtual interrupts.
 		 */
 		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+			vcpu_req_record_steal(vcpu);
 	}
 }
 
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
index 2906b2df99df..196c71c8dd87 100644
--- a/virt/kvm/arm/hypercalls.c
+++ b/virt/kvm/arm/hypercalls.c
@@ -10,6 +10,70 @@ 
 #include <kvm/arm_hypercalls.h>
 #include <kvm/arm_psci.h>
 
+
+static struct pvclock_vcpu_stolen_time_info *pvtime_get_st(
+		struct kvm_vcpu *vcpu)
+{
+	struct pvclock_vcpu_stolen_time_info *st = vcpu->kvm->arch.pvtime.st;
+
+	if (!st)
+		return NULL;
+
+	return &st[kvm_vcpu_get_idx(vcpu)];
+}
+
+int kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+	u64 steal;
+	struct pvclock_vcpu_stolen_time_info *kaddr;
+
+	if (vcpu->kvm->arch.pvtime.st_base == GPA_INVALID)
+		return -ENOTSUPP;
+
+	kaddr = pvtime_get_st(vcpu);
+
+	if (!kaddr)
+		return -ENOTSUPP;
+
+	kaddr->revision = 0;
+	kaddr->attributes = 0;
+
+	/* Let's do the local bookkeeping */
+	steal = vcpu->arch.steal.steal;
+	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+	vcpu->arch.steal.steal = steal;
+
+	/* Now write out the value to the shared page */
+	WRITE_ONCE(kaddr->stolen_time, cpu_to_le64(steal));
+
+	return 0;
+}
+
+static int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
+{
+	u64 ret;
+	int err;
+
+	/*
+	 * Start counting stolen time from the time the guest requests
+	 * the feature enabled.
+	 */
+	vcpu->arch.steal.steal = 0;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+	err = kvm_update_stolen_time(vcpu);
+
+	if (err)
+		ret = SMCCC_RET_NOT_SUPPORTED;
+	else
+		ret = vcpu->kvm->arch.pvtime.st_base +
+			(sizeof(struct pvclock_vcpu_stolen_time_info) *
+			 kvm_vcpu_get_idx(vcpu));
+
+	smccc_set_retval(vcpu, ret, 0, 0, 0);
+	return 1;
+}
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
 	u32 func_id = smccc_get_function(vcpu);
@@ -57,8 +121,14 @@  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 	case ARM_SMCCC_HV_PV_FEATURES:
 		feature = smccc_get_arg1(vcpu);
 		switch (feature) {
+		case ARM_SMCCC_HV_PV_FEATURES:
+		case ARM_SMCCC_HV_PV_TIME_ST:
+			val = SMCCC_RET_SUCCESS;
+			break;
 		}
 		break;
+	case ARM_SMCCC_HV_PV_TIME_ST:
+		return kvm_hypercall_stolen_time(vcpu);
 	default:
 		return kvm_psci_call(vcpu);
 	}