diff mbox series

[v3,05/10] KVM: arm64: Support stolen time reporting via shared structure

Message ID 20190821153656.33429-6-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Stolen time support | expand

Commit Message

Steven Price Aug. 21, 2019, 3:36 p.m. UTC
Implement the service call for configuring a shared structure between a
VCPU and the hypervisor in which the hypervisor can write the time
stolen from the VCPU's execution time by other tasks on the host.

The hypervisor allocates memory which is placed at an IPA chosen by user
space. The hypervisor then updates the shared structure using
kvm_put_guest() to ensure single copy atomicity of the 64-bit value
reporting the stolen time in nanoseconds.

Whenever stolen time is enabled by the guest, the stolen time counter is
reset.

The stolen time itself is retrieved from the sched_info structure
maintained by the Linux scheduler code. We enable SCHEDSTATS when
selecting KVM Kconfig to ensure this value is meaningful.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm/include/asm/kvm_host.h   | 20 +++++++++
 arch/arm64/include/asm/kvm_host.h | 25 +++++++++++-
 arch/arm64/kvm/Kconfig            |  1 +
 include/linux/kvm_types.h         |  2 +
 virt/kvm/arm/arm.c                | 10 +++++
 virt/kvm/arm/hypercalls.c         |  3 ++
 virt/kvm/arm/pvtime.c             | 67 +++++++++++++++++++++++++++++++
 7 files changed, 127 insertions(+), 1 deletion(-)

Comments

Jonathan Cameron Aug. 22, 2019, 10:39 a.m. UTC | #1
On Wed, 21 Aug 2019 16:36:51 +0100
Steven Price <steven.price@arm.com> wrote:

> Implement the service call for configuring a shared structure between a
> VCPU and the hypervisor in which the hypervisor can write the time
> stolen from the VCPU's execution time by other tasks on the host.
> 
> The hypervisor allocates memory which is placed at an IPA chosen by user
> space. The hypervisor then updates the shared structure using
> kvm_put_guest() to ensure single copy atomicity of the 64-bit value
> reporting the stolen time in nanoseconds.
> 
> Whenever stolen time is enabled by the guest, the stolen time counter is
> reset.
> 
> The stolen time itself is retrieved from the sched_info structure
> maintained by the Linux scheduler code. We enable SCHEDSTATS when
> selecting KVM Kconfig to ensure this value is meaningful.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>

One totally trivial comment inline... Feel free to ignore :)

> ---
>  arch/arm/include/asm/kvm_host.h   | 20 +++++++++
>  arch/arm64/include/asm/kvm_host.h | 25 +++++++++++-
>  arch/arm64/kvm/Kconfig            |  1 +
>  include/linux/kvm_types.h         |  2 +
>  virt/kvm/arm/arm.c                | 10 +++++
>  virt/kvm/arm/hypercalls.c         |  3 ++
>  virt/kvm/arm/pvtime.c             | 67 +++++++++++++++++++++++++++++++
>  7 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 369b5d2d54bf..47d2ced99421 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -39,6 +39,7 @@
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> @@ -329,6 +330,25 @@ static inline int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>  	return SMCCC_RET_NOT_SUPPORTED;
>  }
>  
> +static inline int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	return SMCCC_RET_NOT_SUPPORTED;
> +}
> +
> +static inline int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
> +{
> +	return -ENOTSUPP;
> +}
> +
> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
> +{
> +}
> +
> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
> +{
> +	return false;
> +}
> +
>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>  
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 583b3639062a..b6fa7beffd8a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,6 +44,7 @@
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>  #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>  #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>  
>  DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>  
> @@ -83,6 +84,11 @@ struct kvm_arch {
>  
>  	/* Mandated version of PSCI */
>  	u32 psci_version;
> +
> +	struct kvm_arch_pvtime {
> +		gpa_t st_base;
> +		u64 st_size;
> +	} pvtime;
>  };
>  
>  #define KVM_NR_MEM_OBJS     40
> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>  	/* True when deferrable sysregs are loaded on the physical CPU,
>  	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>  	bool sysregs_loaded_on_cpu;
> -};
>  
> +	/* Guest PV state */
> +	struct {
> +		u64 steal;
> +		u64 last_steal;
> +	} steal;
> +};
>  /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> @@ -479,6 +490,18 @@ int kvm_perf_init(void);
>  int kvm_perf_teardown(void);
>  
>  int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu);
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init);
> +
> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
> +{
> +	kvm_arch->pvtime.st_base = GPA_INVALID;
> +}
> +
> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
> +{
> +	return (kvm_arch->pvtime.st_base != GPA_INVALID);
> +}
>  
>  void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
>  
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index a67121d419a2..d8b88e40d223 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -39,6 +39,7 @@ config KVM
>  	select IRQ_BYPASS_MANAGER
>  	select HAVE_KVM_IRQ_BYPASS
>  	select HAVE_KVM_VCPU_RUN_PID_CHANGE
> +	select SCHEDSTATS
>  	---help---
>  	  Support hosting virtualized guest machines.
>  	  We don't support KVM with 16K page tables yet, due to the multiple
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index bde5374ae021..1c88e69db3d9 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>  typedef u64            gpa_t;
>  typedef u64            gfn_t;
>  
> +#define GPA_INVALID	(~(gpa_t)0)
> +
>  typedef unsigned long  hva_t;
>  typedef u64            hpa_t;
>  typedef u64            hfn_t;
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 35a069815baf..5e8343e2dd62 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -40,6 +40,10 @@
>  #include <asm/kvm_coproc.h>
>  #include <asm/sections.h>
>  
> +#include <kvm/arm_hypercalls.h>
> +#include <kvm/arm_pmu.h>
> +#include <kvm/arm_psci.h>
> +
>  #ifdef REQUIRES_VIRT
>  __asm__(".arch_extension	virt");
>  #endif
> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	kvm->arch.max_vcpus = vgic_present ?
>  				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>  
> +	kvm_pvtime_init_vm(&kvm->arch);
>  	return ret;
>  out_free_stage2_pgd:
>  	kvm_free_stage2_pgd(kvm);
> @@ -379,6 +384,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  	kvm_vcpu_load_sysregs(vcpu);
>  	kvm_arch_vcpu_load_fp(vcpu);
>  	kvm_vcpu_pmu_restore_guest(vcpu);
> +	if (kvm_is_pvtime_enabled(&vcpu->kvm->arch))
> +		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>  
>  	if (single_task_running())
>  		vcpu_clear_wfe_traps(vcpu);
> @@ -644,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>  		 * that a VCPU sees new virtual interrupts.
>  		 */
>  		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
> +			kvm_update_stolen_time(vcpu, false);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
> index 63ae629c466a..ac678eabf15f 100644
> --- a/virt/kvm/arm/hypercalls.c
> +++ b/virt/kvm/arm/hypercalls.c
> @@ -56,6 +56,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>  	case ARM_SMCCC_HV_PV_FEATURES:
>  		val = kvm_hypercall_pv_features(vcpu);
>  		break;
> +	case ARM_SMCCC_HV_PV_TIME_ST:
> +		val = kvm_hypercall_stolen_time(vcpu);
> +		break;
>  	default:
>  		return kvm_psci_call(vcpu);
>  	}
> diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
> index 6201d71cb1f8..28603689f6e0 100644
> --- a/virt/kvm/arm/pvtime.c
> +++ b/virt/kvm/arm/pvtime.c
> @@ -3,8 +3,51 @@
>  
>  #include <linux/arm-smccc.h>
>  
> +#include <asm/pvclock-abi.h>
> +
>  #include <kvm/arm_hypercalls.h>
>  
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_arch_pvtime *pvtime = &kvm->arch.pvtime;
> +	u64 steal;
> +	u64 steal_le;
> +	u64 offset;
> +	int idx;
> +	const int stride = sizeof(struct pvclock_vcpu_stolen_time);
> +
> +	if (pvtime->st_base == GPA_INVALID)
> +		return -ENOTSUPP;
> +
> +	/* Let's do the local bookkeeping */
> +	steal = vcpu->arch.steal.steal;
> +	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +	vcpu->arch.steal.steal = steal;
> +
> +	offset = stride * kvm_vcpu_get_idx(vcpu);
> +
> +	if (unlikely(offset + stride > pvtime->st_size))
> +		return -EINVAL;
> +
> +	steal_le = cpu_to_le64(steal);
> +	idx = srcu_read_lock(&kvm->srcu);
> +	if (init) {
> +		struct pvclock_vcpu_stolen_time init_values = {
> +			.revision = 0,
> +			.attributes = 0
> +		};
> +		kvm_write_guest(kvm, pvtime->st_base + offset, &init_values,
> +				sizeof(init_values));
> +	}
> +	offset += offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
> +	kvm_put_guest(kvm, pvtime->st_base + offset, steal_le, u64);
> +	srcu_read_unlock(&kvm->srcu, idx);
> +
> +	return 0;
> +}
> +
>  int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>  {
>  	u32 feature = smccc_get_arg1(vcpu);
> @@ -12,6 +55,7 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>  
>  	switch (feature) {
>  	case ARM_SMCCC_HV_PV_FEATURES:
> +	case ARM_SMCCC_HV_PV_TIME_ST:
>  		val = SMCCC_RET_SUCCESS;
>  		break;
>  	}
> @@ -19,3 +63,26 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>  	return val;
>  }
>  
> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	u64 ret;
> +	int err;
> +
> +	/*
> +	 * Start counting stolen time from the time the guest requests
> +	 * the feature enabled.
> +	 */
> +	vcpu->arch.steal.steal = 0;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +
> +	err = kvm_update_stolen_time(vcpu, true);
> +
> +	if (err)
> +		ret = SMCCC_RET_NOT_SUPPORTED;

Trivial by why not
		return SMCCC_RET_NOT_SUPPORTED;

	return vcpu->kvm->arch.pvtime.st_base +
...
Drops the indentation a bit and puts the error handling out of
line which is slightly nicer to read (to my eyes).

> +	else
> +		ret = vcpu->kvm->arch.pvtime.st_base +
> +			(sizeof(struct pvclock_vcpu_stolen_time) *
> +			 kvm_vcpu_get_idx(vcpu));
> +
> +	return ret;
> +}
Steven Price Aug. 22, 2019, 11 a.m. UTC | #2
On 22/08/2019 11:39, Jonathan Cameron wrote:
> On Wed, 21 Aug 2019 16:36:51 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>> Implement the service call for configuring a shared structure between a
>> VCPU and the hypervisor in which the hypervisor can write the time
>> stolen from the VCPU's execution time by other tasks on the host.
>>
>> The hypervisor allocates memory which is placed at an IPA chosen by user
>> space. The hypervisor then updates the shared structure using
>> kvm_put_guest() to ensure single copy atomicity of the 64-bit value
>> reporting the stolen time in nanoseconds.
>>
>> Whenever stolen time is enabled by the guest, the stolen time counter is
>> reset.
>>
>> The stolen time itself is retrieved from the sched_info structure
>> maintained by the Linux scheduler code. We enable SCHEDSTATS when
>> selecting KVM Kconfig to ensure this value is meaningful.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
> 
> One totally trivial comment inline... Feel free to ignore :)
> 
[...]
>> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
>> +{
>> +	u64 ret;
>> +	int err;
>> +
>> +	/*
>> +	 * Start counting stolen time from the time the guest requests
>> +	 * the feature enabled.
>> +	 */
>> +	vcpu->arch.steal.steal = 0;
>> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
>> +
>> +	err = kvm_update_stolen_time(vcpu, true);
>> +
>> +	if (err)
>> +		ret = SMCCC_RET_NOT_SUPPORTED;
> 
> Trivial by why not
> 		return SMCCC_RET_NOT_SUPPORTED;
> 
> 	return vcpu->kvm->arch.pvtime.st_base +
> ...
> Drops the indentation a bit and puts the error handling out of
> line which is slightly nicer to read (to my eyes).

Yes that's a nice change - drops the extra "ret" variable too.

Thanks,

Steve
Zenghui Yu Aug. 23, 2019, 12:07 p.m. UTC | #3
Hi Steven,

Only one comment, at the bottom.

On 2019/8/21 23:36, Steven Price wrote:
> Implement the service call for configuring a shared structure between a
> VCPU and the hypervisor in which the hypervisor can write the time
> stolen from the VCPU's execution time by other tasks on the host.
> 
> The hypervisor allocates memory which is placed at an IPA chosen by user
> space. The hypervisor then updates the shared structure using
> kvm_put_guest() to ensure single copy atomicity of the 64-bit value
> reporting the stolen time in nanoseconds.
> 
> Whenever stolen time is enabled by the guest, the stolen time counter is
> reset.
> 
> The stolen time itself is retrieved from the sched_info structure
> maintained by the Linux scheduler code. We enable SCHEDSTATS when
> selecting KVM Kconfig to ensure this value is meaningful.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>   arch/arm/include/asm/kvm_host.h   | 20 +++++++++
>   arch/arm64/include/asm/kvm_host.h | 25 +++++++++++-
>   arch/arm64/kvm/Kconfig            |  1 +
>   include/linux/kvm_types.h         |  2 +
>   virt/kvm/arm/arm.c                | 10 +++++
>   virt/kvm/arm/hypercalls.c         |  3 ++
>   virt/kvm/arm/pvtime.c             | 67 +++++++++++++++++++++++++++++++
>   7 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 369b5d2d54bf..47d2ced99421 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -39,6 +39,7 @@
>   	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>   #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>   #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>   
>   DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>   
> @@ -329,6 +330,25 @@ static inline int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>   	return SMCCC_RET_NOT_SUPPORTED;
>   }
>   
> +static inline int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	return SMCCC_RET_NOT_SUPPORTED;
> +}
> +
> +static inline int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
> +{
> +	return -ENOTSUPP;
> +}
> +
> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
> +{
> +}
> +
> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
> +{
> +	return false;
> +}
> +
>   void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>   
>   struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 583b3639062a..b6fa7beffd8a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,6 +44,7 @@
>   	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>   #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
>   #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
> +#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
>   
>   DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>   
> @@ -83,6 +84,11 @@ struct kvm_arch {
>   
>   	/* Mandated version of PSCI */
>   	u32 psci_version;
> +
> +	struct kvm_arch_pvtime {
> +		gpa_t st_base;
> +		u64 st_size;
> +	} pvtime;
>   };
>   
>   #define KVM_NR_MEM_OBJS     40
> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>   	/* True when deferrable sysregs are loaded on the physical CPU,
>   	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>   	bool sysregs_loaded_on_cpu;
> -};
>   
> +	/* Guest PV state */
> +	struct {
> +		u64 steal;
> +		u64 last_steal;
> +	} steal;
> +};
>   /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>   #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>   				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> @@ -479,6 +490,18 @@ int kvm_perf_init(void);
>   int kvm_perf_teardown(void);
>   
>   int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu);
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init);
> +
> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
> +{
> +	kvm_arch->pvtime.st_base = GPA_INVALID;
> +}
> +
> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
> +{
> +	return (kvm_arch->pvtime.st_base != GPA_INVALID);
> +}
>   
>   void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
>   
> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
> index a67121d419a2..d8b88e40d223 100644
> --- a/arch/arm64/kvm/Kconfig
> +++ b/arch/arm64/kvm/Kconfig
> @@ -39,6 +39,7 @@ config KVM
>   	select IRQ_BYPASS_MANAGER
>   	select HAVE_KVM_IRQ_BYPASS
>   	select HAVE_KVM_VCPU_RUN_PID_CHANGE
> +	select SCHEDSTATS
>   	---help---
>   	  Support hosting virtualized guest machines.
>   	  We don't support KVM with 16K page tables yet, due to the multiple
> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
> index bde5374ae021..1c88e69db3d9 100644
> --- a/include/linux/kvm_types.h
> +++ b/include/linux/kvm_types.h
> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>   typedef u64            gpa_t;
>   typedef u64            gfn_t;
>   
> +#define GPA_INVALID	(~(gpa_t)0)
> +
>   typedef unsigned long  hva_t;
>   typedef u64            hpa_t;
>   typedef u64            hfn_t;
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 35a069815baf..5e8343e2dd62 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -40,6 +40,10 @@
>   #include <asm/kvm_coproc.h>
>   #include <asm/sections.h>
>   
> +#include <kvm/arm_hypercalls.h>
> +#include <kvm/arm_pmu.h>
> +#include <kvm/arm_psci.h>
> +
>   #ifdef REQUIRES_VIRT
>   __asm__(".arch_extension	virt");
>   #endif
> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>   	kvm->arch.max_vcpus = vgic_present ?
>   				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>   
> +	kvm_pvtime_init_vm(&kvm->arch);
>   	return ret;
>   out_free_stage2_pgd:
>   	kvm_free_stage2_pgd(kvm);
> @@ -379,6 +384,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   	kvm_vcpu_load_sysregs(vcpu);
>   	kvm_arch_vcpu_load_fp(vcpu);
>   	kvm_vcpu_pmu_restore_guest(vcpu);
> +	if (kvm_is_pvtime_enabled(&vcpu->kvm->arch))
> +		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>   
>   	if (single_task_running())
>   		vcpu_clear_wfe_traps(vcpu);
> @@ -644,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
>   		 * that a VCPU sees new virtual interrupts.
>   		 */
>   		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
> +
> +		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
> +			kvm_update_stolen_time(vcpu, false);
>   	}
>   }
>   
> diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
> index 63ae629c466a..ac678eabf15f 100644
> --- a/virt/kvm/arm/hypercalls.c
> +++ b/virt/kvm/arm/hypercalls.c
> @@ -56,6 +56,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>   	case ARM_SMCCC_HV_PV_FEATURES:
>   		val = kvm_hypercall_pv_features(vcpu);
>   		break;
> +	case ARM_SMCCC_HV_PV_TIME_ST:
> +		val = kvm_hypercall_stolen_time(vcpu);
> +		break;
>   	default:
>   		return kvm_psci_call(vcpu);
>   	}
> diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
> index 6201d71cb1f8..28603689f6e0 100644
> --- a/virt/kvm/arm/pvtime.c
> +++ b/virt/kvm/arm/pvtime.c
> @@ -3,8 +3,51 @@
>   
>   #include <linux/arm-smccc.h>
>   
> +#include <asm/pvclock-abi.h>
> +
>   #include <kvm/arm_hypercalls.h>
>   
> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_arch_pvtime *pvtime = &kvm->arch.pvtime;
> +	u64 steal;
> +	u64 steal_le;
> +	u64 offset;
> +	int idx;
> +	const int stride = sizeof(struct pvclock_vcpu_stolen_time);
> +
> +	if (pvtime->st_base == GPA_INVALID)
> +		return -ENOTSUPP;
> +
> +	/* Let's do the local bookkeeping */
> +	steal = vcpu->arch.steal.steal;
> +	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +	vcpu->arch.steal.steal = steal;
> +
> +	offset = stride * kvm_vcpu_get_idx(vcpu);
> +
> +	if (unlikely(offset + stride > pvtime->st_size))
> +		return -EINVAL;
> +
> +	steal_le = cpu_to_le64(steal);
> +	idx = srcu_read_lock(&kvm->srcu);
> +	if (init) {
> +		struct pvclock_vcpu_stolen_time init_values = {
> +			.revision = 0,
> +			.attributes = 0
> +		};
> +		kvm_write_guest(kvm, pvtime->st_base + offset, &init_values,
> +				sizeof(init_values));
> +	}
> +	offset += offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
> +	kvm_put_guest(kvm, pvtime->st_base + offset, steal_le, u64);
> +	srcu_read_unlock(&kvm->srcu, idx);
> +
> +	return 0;
> +}
> +
>   int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>   {
>   	u32 feature = smccc_get_arg1(vcpu);
> @@ -12,6 +55,7 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>   
>   	switch (feature) {
>   	case ARM_SMCCC_HV_PV_FEATURES:
> +	case ARM_SMCCC_HV_PV_TIME_ST:
>   		val = SMCCC_RET_SUCCESS;
>   		break;
>   	}
> @@ -19,3 +63,26 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>   	return val;
>   }
>   
> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +	u64 ret;
> +	int err;
> +
> +	/*
> +	 * Start counting stolen time from the time the guest requests
> +	 * the feature enabled.
> +	 */
> +	vcpu->arch.steal.steal = 0;
> +	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
> +
> +	err = kvm_update_stolen_time(vcpu, true);
> +
> +	if (err)
> +		ret = SMCCC_RET_NOT_SUPPORTED;
> +	else
> +		ret = vcpu->kvm->arch.pvtime.st_base +
> +			(sizeof(struct pvclock_vcpu_stolen_time) *
> +			 kvm_vcpu_get_idx(vcpu));
> +
> +	return ret;

The *type* of the 'ret' here looks a bit messy to me:
(1)u64 -> (2)int -> (3)u32 -> (4)unsigned long

(1)->(2): just inside kvm_hypercall_stolen_time()
(2)->(3): inside kvm_hvc_call_handler(), assign 'ret' to 'val'
(3)->(4): through smccc_set_retval()

I really have seen an issue caused by (2)->(3).

When the PV guest running without PV_TIME device supporting, the result
of the ARM_SMCCC_HV_PV_TIME_ST hypercall is expected to be -1 (which
means "not supported"), but the actual result I got is 4294967295.
Guest continues to run blindly, bad things would happen then...

I think this needs a fix?


Thanks,
zenghui
Steven Price Aug. 23, 2019, 1:23 p.m. UTC | #4
On 23/08/2019 13:07, Zenghui Yu wrote:
> Hi Steven,
> 
> Only one comment, at the bottom.
> 
> On 2019/8/21 23:36, Steven Price wrote:
>> Implement the service call for configuring a shared structure between a
>> VCPU and the hypervisor in which the hypervisor can write the time
>> stolen from the VCPU's execution time by other tasks on the host.
>>
>> The hypervisor allocates memory which is placed at an IPA chosen by user
>> space. The hypervisor then updates the shared structure using
>> kvm_put_guest() to ensure single copy atomicity of the 64-bit value
>> reporting the stolen time in nanoseconds.
>>
>> Whenever stolen time is enabled by the guest, the stolen time counter is
>> reset.
>>
>> The stolen time itself is retrieved from the sched_info structure
>> maintained by the Linux scheduler code. We enable SCHEDSTATS when
>> selecting KVM Kconfig to ensure this value is meaningful.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>   arch/arm/include/asm/kvm_host.h   | 20 +++++++++
>>   arch/arm64/include/asm/kvm_host.h | 25 +++++++++++-
>>   arch/arm64/kvm/Kconfig            |  1 +
>>   include/linux/kvm_types.h         |  2 +
>>   virt/kvm/arm/arm.c                | 10 +++++
>>   virt/kvm/arm/hypercalls.c         |  3 ++
>>   virt/kvm/arm/pvtime.c             | 67 +++++++++++++++++++++++++++++++
>>   7 files changed, 127 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_host.h
>> b/arch/arm/include/asm/kvm_host.h
>> index 369b5d2d54bf..47d2ced99421 100644
>> --- a/arch/arm/include/asm/kvm_host.h
>> +++ b/arch/arm/include/asm/kvm_host.h
>> @@ -39,6 +39,7 @@
>>       KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>>   #define KVM_REQ_IRQ_PENDING    KVM_ARCH_REQ(1)
>>   #define KVM_REQ_VCPU_RESET    KVM_ARCH_REQ(2)
>> +#define KVM_REQ_RECORD_STEAL    KVM_ARCH_REQ(3)
>>     DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>   @@ -329,6 +330,25 @@ static inline int
>> kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>>       return SMCCC_RET_NOT_SUPPORTED;
>>   }
>>   +static inline int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
>> +{
>> +    return SMCCC_RET_NOT_SUPPORTED;
>> +}
>> +
>> +static inline int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool
>> init)
>> +{
>> +    return -ENOTSUPP;
>> +}
>> +
>> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
>> +{
>> +}
>> +
>> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
>> +{
>> +    return false;
>> +}
>> +
>>   void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>>     struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long
>> mpidr);
>> diff --git a/arch/arm64/include/asm/kvm_host.h
>> b/arch/arm64/include/asm/kvm_host.h
>> index 583b3639062a..b6fa7beffd8a 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -44,6 +44,7 @@
>>       KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
>>   #define KVM_REQ_IRQ_PENDING    KVM_ARCH_REQ(1)
>>   #define KVM_REQ_VCPU_RESET    KVM_ARCH_REQ(2)
>> +#define KVM_REQ_RECORD_STEAL    KVM_ARCH_REQ(3)
>>     DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>>   @@ -83,6 +84,11 @@ struct kvm_arch {
>>         /* Mandated version of PSCI */
>>       u32 psci_version;
>> +
>> +    struct kvm_arch_pvtime {
>> +        gpa_t st_base;
>> +        u64 st_size;
>> +    } pvtime;
>>   };
>>     #define KVM_NR_MEM_OBJS     40
>> @@ -338,8 +344,13 @@ struct kvm_vcpu_arch {
>>       /* True when deferrable sysregs are loaded on the physical CPU,
>>        * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
>>       bool sysregs_loaded_on_cpu;
>> -};
>>   +    /* Guest PV state */
>> +    struct {
>> +        u64 steal;
>> +        u64 last_steal;
>> +    } steal;
>> +};
>>   /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>>   #define vcpu_sve_pffr(vcpu) ((void *)((char
>> *)((vcpu)->arch.sve_state) + \
>>                         sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>> @@ -479,6 +490,18 @@ int kvm_perf_init(void);
>>   int kvm_perf_teardown(void);
>>     int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
>> +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu);
>> +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init);
>> +
>> +static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
>> +{
>> +    kvm_arch->pvtime.st_base = GPA_INVALID;
>> +}
>> +
>> +static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
>> +{
>> +    return (kvm_arch->pvtime.st_base != GPA_INVALID);
>> +}
>>     void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
>>   diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
>> index a67121d419a2..d8b88e40d223 100644
>> --- a/arch/arm64/kvm/Kconfig
>> +++ b/arch/arm64/kvm/Kconfig
>> @@ -39,6 +39,7 @@ config KVM
>>       select IRQ_BYPASS_MANAGER
>>       select HAVE_KVM_IRQ_BYPASS
>>       select HAVE_KVM_VCPU_RUN_PID_CHANGE
>> +    select SCHEDSTATS
>>       ---help---
>>         Support hosting virtualized guest machines.
>>         We don't support KVM with 16K page tables yet, due to the
>> multiple
>> diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
>> index bde5374ae021..1c88e69db3d9 100644
>> --- a/include/linux/kvm_types.h
>> +++ b/include/linux/kvm_types.h
>> @@ -35,6 +35,8 @@ typedef unsigned long  gva_t;
>>   typedef u64            gpa_t;
>>   typedef u64            gfn_t;
>>   +#define GPA_INVALID    (~(gpa_t)0)
>> +
>>   typedef unsigned long  hva_t;
>>   typedef u64            hpa_t;
>>   typedef u64            hfn_t;
>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>> index 35a069815baf..5e8343e2dd62 100644
>> --- a/virt/kvm/arm/arm.c
>> +++ b/virt/kvm/arm/arm.c
>> @@ -40,6 +40,10 @@
>>   #include <asm/kvm_coproc.h>
>>   #include <asm/sections.h>
>>   +#include <kvm/arm_hypercalls.h>
>> +#include <kvm/arm_pmu.h>
>> +#include <kvm/arm_psci.h>
>> +
>>   #ifdef REQUIRES_VIRT
>>   __asm__(".arch_extension    virt");
>>   #endif
>> @@ -135,6 +139,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned
>> long type)
>>       kvm->arch.max_vcpus = vgic_present ?
>>                   kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
>>   +    kvm_pvtime_init_vm(&kvm->arch);
>>       return ret;
>>   out_free_stage2_pgd:
>>       kvm_free_stage2_pgd(kvm);
>> @@ -379,6 +384,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int
>> cpu)
>>       kvm_vcpu_load_sysregs(vcpu);
>>       kvm_arch_vcpu_load_fp(vcpu);
>>       kvm_vcpu_pmu_restore_guest(vcpu);
>> +    if (kvm_is_pvtime_enabled(&vcpu->kvm->arch))
>> +        kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
>>         if (single_task_running())
>>           vcpu_clear_wfe_traps(vcpu);
>> @@ -644,6 +651,9 @@ static void check_vcpu_requests(struct kvm_vcpu
>> *vcpu)
>>            * that a VCPU sees new virtual interrupts.
>>            */
>>           kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
>> +
>> +        if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
>> +            kvm_update_stolen_time(vcpu, false);
>>       }
>>   }
>>   diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
>> index 63ae629c466a..ac678eabf15f 100644
>> --- a/virt/kvm/arm/hypercalls.c
>> +++ b/virt/kvm/arm/hypercalls.c
>> @@ -56,6 +56,9 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
>>       case ARM_SMCCC_HV_PV_FEATURES:
>>           val = kvm_hypercall_pv_features(vcpu);
>>           break;
>> +    case ARM_SMCCC_HV_PV_TIME_ST:
>> +        val = kvm_hypercall_stolen_time(vcpu);
>> +        break;
>>       default:
>>           return kvm_psci_call(vcpu);
>>       }
>> diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
>> index 6201d71cb1f8..28603689f6e0 100644
>> --- a/virt/kvm/arm/pvtime.c
>> +++ b/virt/kvm/arm/pvtime.c
>> @@ -3,8 +3,51 @@
>>     #include <linux/arm-smccc.h>
>>   +#include <asm/pvclock-abi.h>
>> +
>>   #include <kvm/arm_hypercalls.h>
>>   +int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
>> +{
>> +    struct kvm *kvm = vcpu->kvm;
>> +    struct kvm_arch_pvtime *pvtime = &kvm->arch.pvtime;
>> +    u64 steal;
>> +    u64 steal_le;
>> +    u64 offset;
>> +    int idx;
>> +    const int stride = sizeof(struct pvclock_vcpu_stolen_time);
>> +
>> +    if (pvtime->st_base == GPA_INVALID)
>> +        return -ENOTSUPP;
>> +
>> +    /* Let's do the local bookkeeping */
>> +    steal = vcpu->arch.steal.steal;
>> +    steal += current->sched_info.run_delay -
>> vcpu->arch.steal.last_steal;
>> +    vcpu->arch.steal.last_steal = current->sched_info.run_delay;
>> +    vcpu->arch.steal.steal = steal;
>> +
>> +    offset = stride * kvm_vcpu_get_idx(vcpu);
>> +
>> +    if (unlikely(offset + stride > pvtime->st_size))
>> +        return -EINVAL;
>> +
>> +    steal_le = cpu_to_le64(steal);
>> +    idx = srcu_read_lock(&kvm->srcu);
>> +    if (init) {
>> +        struct pvclock_vcpu_stolen_time init_values = {
>> +            .revision = 0,
>> +            .attributes = 0
>> +        };
>> +        kvm_write_guest(kvm, pvtime->st_base + offset, &init_values,
>> +                sizeof(init_values));
>> +    }
>> +    offset += offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
>> +    kvm_put_guest(kvm, pvtime->st_base + offset, steal_le, u64);
>> +    srcu_read_unlock(&kvm->srcu, idx);
>> +
>> +    return 0;
>> +}
>> +
>>   int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>>   {
>>       u32 feature = smccc_get_arg1(vcpu);
>> @@ -12,6 +55,7 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>>         switch (feature) {
>>       case ARM_SMCCC_HV_PV_FEATURES:
>> +    case ARM_SMCCC_HV_PV_TIME_ST:
>>           val = SMCCC_RET_SUCCESS;
>>           break;
>>       }
>> @@ -19,3 +63,26 @@ int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
>>       return val;
>>   }
>>   +int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
>> +{
>> +    u64 ret;
>> +    int err;
>> +
>> +    /*
>> +     * Start counting stolen time from the time the guest requests
>> +     * the feature enabled.
>> +     */
>> +    vcpu->arch.steal.steal = 0;
>> +    vcpu->arch.steal.last_steal = current->sched_info.run_delay;
>> +
>> +    err = kvm_update_stolen_time(vcpu, true);
>> +
>> +    if (err)
>> +        ret = SMCCC_RET_NOT_SUPPORTED;
>> +    else
>> +        ret = vcpu->kvm->arch.pvtime.st_base +
>> +            (sizeof(struct pvclock_vcpu_stolen_time) *
>> +             kvm_vcpu_get_idx(vcpu));
>> +
>> +    return ret;
> 
> The *type* of the 'ret' here looks a bit messy to me:
> (1)u64 -> (2)int -> (3)u32 -> (4)unsigned long
> 
> (1)->(2): just inside kvm_hypercall_stolen_time()
> (2)->(3): inside kvm_hvc_call_handler(), assign 'ret' to 'val'
> (3)->(4): through smccc_set_retval()
> 
> I really have seen an issue caused by (2)->(3).
> 
> When the PV guest running without PV_TIME device supporting, the result
> of the ARM_SMCCC_HV_PV_TIME_ST hypercall is expected to be -1 (which
> means "not supported"), but the actual result I got is 4294967295.
> Guest continues to run blindly, bad things would happen then...
> 
> I think this needs a fix?

Yes you are entirely right. I'm afraid this happened because I
refactored the functions and apparently forgot to update the return
type. In a previous version the functions themselves did
smccc_set_retval() themselves and the return value was always "1" (the
same as kvm_hvc_call_handler()).

The function should really return a "long" and "val" in
kvm_hvc_call_handler() should be upgraded to a "long" too - the
SMC64/HVC64 calling convention requires error codes to be 64-bit signed
integers.

Thanks for spotting this!

Steve
diff mbox series

Patch

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 369b5d2d54bf..47d2ced99421 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -39,6 +39,7 @@ 
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -329,6 +330,25 @@  static inline int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 	return SMCCC_RET_NOT_SUPPORTED;
 }
 
+static inline int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
+{
+	return SMCCC_RET_NOT_SUPPORTED;
+}
+
+static inline int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
+{
+	return -ENOTSUPP;
+}
+
+static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
+{
+}
+
+static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
+{
+	return false;
+}
+
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 583b3639062a..b6fa7beffd8a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@ 
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_IRQ_PENDING	KVM_ARCH_REQ(1)
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
@@ -83,6 +84,11 @@  struct kvm_arch {
 
 	/* Mandated version of PSCI */
 	u32 psci_version;
+
+	struct kvm_arch_pvtime {
+		gpa_t st_base;
+		u64 st_size;
+	} pvtime;
 };
 
 #define KVM_NR_MEM_OBJS     40
@@ -338,8 +344,13 @@  struct kvm_vcpu_arch {
 	/* True when deferrable sysregs are loaded on the physical CPU,
 	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
 	bool sysregs_loaded_on_cpu;
-};
 
+	/* Guest PV state */
+	struct {
+		u64 steal;
+		u64 last_steal;
+	} steal;
+};
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
 #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
 				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
@@ -479,6 +490,18 @@  int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
 int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu);
+int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init);
+
+static inline void kvm_pvtime_init_vm(struct kvm_arch *kvm_arch)
+{
+	kvm_arch->pvtime.st_base = GPA_INVALID;
+}
+
+static inline bool kvm_is_pvtime_enabled(struct kvm_arch *kvm_arch)
+{
+	return (kvm_arch->pvtime.st_base != GPA_INVALID);
+}
 
 void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..d8b88e40d223 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -39,6 +39,7 @@  config KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
+	select SCHEDSTATS
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index bde5374ae021..1c88e69db3d9 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -35,6 +35,8 @@  typedef unsigned long  gva_t;
 typedef u64            gpa_t;
 typedef u64            gfn_t;
 
+#define GPA_INVALID	(~(gpa_t)0)
+
 typedef unsigned long  hva_t;
 typedef u64            hpa_t;
 typedef u64            hfn_t;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 35a069815baf..5e8343e2dd62 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -40,6 +40,10 @@ 
 #include <asm/kvm_coproc.h>
 #include <asm/sections.h>
 
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
 #endif
@@ -135,6 +139,7 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.max_vcpus = vgic_present ?
 				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 
+	kvm_pvtime_init_vm(&kvm->arch);
 	return ret;
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(kvm);
@@ -379,6 +384,8 @@  void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_vcpu_load_sysregs(vcpu);
 	kvm_arch_vcpu_load_fp(vcpu);
 	kvm_vcpu_pmu_restore_guest(vcpu);
+	if (kvm_is_pvtime_enabled(&vcpu->kvm->arch))
+		kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
 
 	if (single_task_running())
 		vcpu_clear_wfe_traps(vcpu);
@@ -644,6 +651,9 @@  static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 		 * that a VCPU sees new virtual interrupts.
 		 */
 		kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+		if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+			kvm_update_stolen_time(vcpu, false);
 	}
 }
 
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
index 63ae629c466a..ac678eabf15f 100644
--- a/virt/kvm/arm/hypercalls.c
+++ b/virt/kvm/arm/hypercalls.c
@@ -56,6 +56,9 @@  int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 	case ARM_SMCCC_HV_PV_FEATURES:
 		val = kvm_hypercall_pv_features(vcpu);
 		break;
+	case ARM_SMCCC_HV_PV_TIME_ST:
+		val = kvm_hypercall_stolen_time(vcpu);
+		break;
 	default:
 		return kvm_psci_call(vcpu);
 	}
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
index 6201d71cb1f8..28603689f6e0 100644
--- a/virt/kvm/arm/pvtime.c
+++ b/virt/kvm/arm/pvtime.c
@@ -3,8 +3,51 @@ 
 
 #include <linux/arm-smccc.h>
 
+#include <asm/pvclock-abi.h>
+
 #include <kvm/arm_hypercalls.h>
 
+int kvm_update_stolen_time(struct kvm_vcpu *vcpu, bool init)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_arch_pvtime *pvtime = &kvm->arch.pvtime;
+	u64 steal;
+	u64 steal_le;
+	u64 offset;
+	int idx;
+	const int stride = sizeof(struct pvclock_vcpu_stolen_time);
+
+	if (pvtime->st_base == GPA_INVALID)
+		return -ENOTSUPP;
+
+	/* Let's do the local bookkeeping */
+	steal = vcpu->arch.steal.steal;
+	steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+	vcpu->arch.steal.steal = steal;
+
+	offset = stride * kvm_vcpu_get_idx(vcpu);
+
+	if (unlikely(offset + stride > pvtime->st_size))
+		return -EINVAL;
+
+	steal_le = cpu_to_le64(steal);
+	idx = srcu_read_lock(&kvm->srcu);
+	if (init) {
+		struct pvclock_vcpu_stolen_time init_values = {
+			.revision = 0,
+			.attributes = 0
+		};
+		kvm_write_guest(kvm, pvtime->st_base + offset, &init_values,
+				sizeof(init_values));
+	}
+	offset += offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+	kvm_put_guest(kvm, pvtime->st_base + offset, steal_le, u64);
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return 0;
+}
+
 int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 {
 	u32 feature = smccc_get_arg1(vcpu);
@@ -12,6 +55,7 @@  int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 
 	switch (feature) {
 	case ARM_SMCCC_HV_PV_FEATURES:
+	case ARM_SMCCC_HV_PV_TIME_ST:
 		val = SMCCC_RET_SUCCESS;
 		break;
 	}
@@ -19,3 +63,26 @@  int kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
 	return val;
 }
 
+int kvm_hypercall_stolen_time(struct kvm_vcpu *vcpu)
+{
+	u64 ret;
+	int err;
+
+	/*
+	 * Start counting stolen time from the time the guest requests
+	 * the feature enabled.
+	 */
+	vcpu->arch.steal.steal = 0;
+	vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+	err = kvm_update_stolen_time(vcpu, true);
+
+	if (err)
+		ret = SMCCC_RET_NOT_SUPPORTED;
+	else
+		ret = vcpu->kvm->arch.pvtime.st_base +
+			(sizeof(struct pvclock_vcpu_stolen_time) *
+			 kvm_vcpu_get_idx(vcpu));
+
+	return ret;
+}