diff mbox series

[v5,09/13] KVM: arm64: Allow userspace to configure a vCPU's virtual offset

Message ID 20210729173300.181775-10-oupton@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: Add idempotent controls for migrating system counter state | expand

Commit Message

Oliver Upton July 29, 2021, 5:32 p.m. UTC
Add a new vCPU attribute that allows userspace to directly manipulate
the virtual counter-timer offset. Exposing such an interface allows for
the precise migration of guest virtual counter-timers, as it is an
indepotent interface.

Uphold the existing behavior of writes to CNTVOFF_EL2 for this new
interface, wherein a write to a single vCPU is broadcasted to all vCPUs
within a VM.

Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Oliver Upton <oupton@google.com>
---
 Documentation/virt/kvm/devices/vcpu.rst | 22 ++++++++
 arch/arm64/include/uapi/asm/kvm.h       |  1 +
 arch/arm64/kvm/arch_timer.c             | 68 ++++++++++++++++++++++++-
 3 files changed, 89 insertions(+), 2 deletions(-)

Comments

Marc Zyngier July 30, 2021, 10:12 a.m. UTC | #1
On Thu, 29 Jul 2021 18:32:56 +0100,
Oliver Upton <oupton@google.com> wrote:
> 
> Add a new vCPU attribute that allows userspace to directly manipulate
> the virtual counter-timer offset. Exposing such an interface allows for
> the precise migration of guest virtual counter-timers, as it is an
> indepotent interface.
> 
> Uphold the existing behavior of writes to CNTVOFF_EL2 for this new
> interface, wherein a write to a single vCPU is broadcasted to all vCPUs
> within a VM.
> 
> Reviewed-by: Andrew Jones <drjones@redhat.com>
> Signed-off-by: Oliver Upton <oupton@google.com>
> ---
>  Documentation/virt/kvm/devices/vcpu.rst | 22 ++++++++
>  arch/arm64/include/uapi/asm/kvm.h       |  1 +
>  arch/arm64/kvm/arch_timer.c             | 68 ++++++++++++++++++++++++-
>  3 files changed, 89 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
> index 0f46f2588905..ecbab7adc602 100644
> --- a/Documentation/virt/kvm/devices/vcpu.rst
> +++ b/Documentation/virt/kvm/devices/vcpu.rst
> @@ -139,6 +139,28 @@ configured values on other VCPUs.  Userspace should configure the interrupt
>  numbers on at least one VCPU after creating all VCPUs and before running any
>  VCPUs.
>  
> +2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_OFFSET_VTIMER
> +------------------------------------------------
> +
> +:Parameters: Pointer to a 64-bit unsigned counter-timer offset.
> +
> +Returns:
> +
> +	 ======= ======================================
> +	 -EFAULT Error reading/writing the provided
> +	 	 parameter address
> +	 -ENXIO  Attribute not supported
> +	 ======= ======================================
> +
> +Specifies the guest's virtual counter-timer offset from the host's
> +virtual counter. The guest's virtual counter is then derived by
> +the following equation:
> +
> +  guest_cntvct = host_cntvct - KVM_ARM_VCPU_TIMER_OFFSET_VTIMER

I still have a problem with this, specially as you later introduce a
physical timer offset. My gut feeling is that the virtual offset
should be relative to the physical counter *of the guest*, and not
that of the host. The physical offset should be the only one that is
relative to the host. Anything else should be deriving from it.

If you don't set the ptimer offset, then the two definitions are
strictly identical. It will also match the definition of a
userspace-visible CNTVOFF_EL2 with NV, which is strictly relative to
the guest view of the physical counter.

> +
> +KVM does not allow the use of varying offset values for different vCPUs;
> +the last written offset value will be broadcasted to all vCPUs in a VM.
> +

Please document the effects of this attribute w.r.t. writing
CNTVCT_EL0 from userspace.

>  3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
>  ==================================
>  
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index b3edde68bc3e..008d0518d2b1 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -365,6 +365,7 @@ struct kvm_arm_copy_mte_tags {
>  #define KVM_ARM_VCPU_TIMER_CTRL		1
>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
> +#define   KVM_ARM_VCPU_TIMER_OFFSET_VTIMER	2
>  #define KVM_ARM_VCPU_PVTIME_CTRL	2
>  #define   KVM_ARM_VCPU_PVTIME_IPA	0
>  
> diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
> index 3df67c127489..d2b1b13af658 100644
> --- a/arch/arm64/kvm/arch_timer.c
> +++ b/arch/arm64/kvm/arch_timer.c
> @@ -1305,7 +1305,7 @@ static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
>  	}
>  }
>  
> -int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +int kvm_arm_timer_set_attr_irq(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  {
>  	int __user *uaddr = (int __user *)(long)attr->addr;
>  	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
> @@ -1338,7 +1338,39 @@ int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  	return 0;
>  }
>  
> -int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +int kvm_arm_timer_set_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +{
> +	u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> +	u64 offset;
> +
> +	if (get_user(offset, uaddr))
> +		return -EFAULT;
> +
> +	switch (attr->attr) {
> +	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> +		update_vtimer_cntvoff(vcpu, offset);

Probably not a good idea if the timer is already enabled on any of the
CPUs (we probably already have that problem, so let's fix it once and
for all).

> +		break;
> +	default:
> +		return -ENXIO;
> +	}
> +
> +	return 0;
> +}
> +
> +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +{
> +	switch (attr->attr) {
> +	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
> +	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
> +		return kvm_arm_timer_set_attr_irq(vcpu, attr);
> +	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> +		return kvm_arm_timer_set_attr_offset(vcpu, attr);
> +	}
> +
> +	return -ENXIO;
> +}
> +
> +int kvm_arm_timer_get_attr_irq(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  {
>  	int __user *uaddr = (int __user *)(long)attr->addr;
>  	struct arch_timer_context *timer;
> @@ -1359,11 +1391,43 @@ int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  	return put_user(irq, uaddr);
>  }
>  
> +int kvm_arm_timer_get_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +{
> +	u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> +	struct arch_timer_context *timer;
> +	u64 offset;
> +
> +	switch (attr->attr) {
> +	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> +		timer = vcpu_vtimer(vcpu);
> +		break;
> +	default:
> +		return -ENXIO;

What is the rational for retrieving this offset the first place? I
don't dislike the symmetry, but we already have an architectural way
of getting it (read the counter registers).

> +	}
> +
> +	offset = timer_get_offset(timer);
> +	return put_user(offset, uaddr);
> +}
> +
> +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> +{
> +	switch (attr->attr) {
> +	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
> +	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
> +		return kvm_arm_timer_get_attr_irq(vcpu, attr);
> +	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> +		return kvm_arm_timer_get_attr_offset(vcpu, attr);
> +	}
> +
> +	return -ENXIO;
> +}
> +
>  int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
>  {
>  	switch (attr->attr) {
>  	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
>  	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
> +	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
>  		return 0;
>  	}
>  

Thanks,

	M.
Oliver Upton Aug. 2, 2021, 11:27 p.m. UTC | #2
On Fri, Jul 30, 2021 at 3:12 AM Marc Zyngier <maz@kernel.org> wrote:
>
> On Thu, 29 Jul 2021 18:32:56 +0100,
> Oliver Upton <oupton@google.com> wrote:
> >
> > Add a new vCPU attribute that allows userspace to directly manipulate
> > the virtual counter-timer offset. Exposing such an interface allows for
> > the precise migration of guest virtual counter-timers, as it is an
> > indepotent interface.
> >
> > Uphold the existing behavior of writes to CNTVOFF_EL2 for this new
> > interface, wherein a write to a single vCPU is broadcasted to all vCPUs
> > within a VM.
> >
> > Reviewed-by: Andrew Jones <drjones@redhat.com>
> > Signed-off-by: Oliver Upton <oupton@google.com>
> > ---
> >  Documentation/virt/kvm/devices/vcpu.rst | 22 ++++++++
> >  arch/arm64/include/uapi/asm/kvm.h       |  1 +
> >  arch/arm64/kvm/arch_timer.c             | 68 ++++++++++++++++++++++++-
> >  3 files changed, 89 insertions(+), 2 deletions(-)
> >
> > diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
> > index 0f46f2588905..ecbab7adc602 100644
> > --- a/Documentation/virt/kvm/devices/vcpu.rst
> > +++ b/Documentation/virt/kvm/devices/vcpu.rst
> > @@ -139,6 +139,28 @@ configured values on other VCPUs.  Userspace should configure the interrupt
> >  numbers on at least one VCPU after creating all VCPUs and before running any
> >  VCPUs.
> >
> > +2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_OFFSET_VTIMER
> > +------------------------------------------------
> > +
> > +:Parameters: Pointer to a 64-bit unsigned counter-timer offset.
> > +
> > +Returns:
> > +
> > +      ======= ======================================
> > +      -EFAULT Error reading/writing the provided
> > +              parameter address
> > +      -ENXIO  Attribute not supported
> > +      ======= ======================================
> > +
> > +Specifies the guest's virtual counter-timer offset from the host's
> > +virtual counter. The guest's virtual counter is then derived by
> > +the following equation:
> > +
> > +  guest_cntvct = host_cntvct - KVM_ARM_VCPU_TIMER_OFFSET_VTIMER
>
> I still have a problem with this, specially as you later introduce a
> physical timer offset. My gut feeling is that the virtual offset
> should be relative to the physical counter *of the guest*, and not
> that of the host. The physical offset should be the only one that is
> relative to the host. Anything else should be deriving from it.
>
> If you don't set the ptimer offset, then the two definitions are
> strictly identical. It will also match the definition of a
> userspace-visible CNTVOFF_EL2 with NV, which is strictly relative to
> the guest view of the physical counter.

Yeah, this sounds good to me. I very much like the idea of maintaining
exactly one offset from the host to the guest. So long as users are
fine with paying the cost of an emulated physical counter-timer on
non-ECV hosts. That said, a non-NV guest shouldn't be using the
physical counter in the first place..

>
> > +
> > +KVM does not allow the use of varying offset values for different vCPUs;
> > +the last written offset value will be broadcasted to all vCPUs in a VM.
> > +
>
> Please document the effects of this attribute w.r.t. writing
> CNTVCT_EL0 from userspace.
>

Good idea.

> > -int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > +int kvm_arm_timer_set_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > +{
> > +     u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> > +     u64 offset;
> > +
> > +     if (get_user(offset, uaddr))
> > +             return -EFAULT;
> > +
> > +     switch (attr->attr) {
> > +     case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> > +             update_vtimer_cntvoff(vcpu, offset);
>
> Probably not a good idea if the timer is already enabled on any of the
> CPUs (we probably already have that problem, so let's fix it once and
> for all).

hmm... would this cause any issues to enforce ordering on an existing
UAPI? If I understand the suggestion correctly, we will refuse to
write the counter offset for a VM with an active timer.

If that is the case, then when we migrate a guest the VMM would have
to be very deliberate about the order in which it restores registers,
no?

> > +int kvm_arm_timer_get_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > +{
> > +     u64 __user *uaddr = (u64 __user *)(long)attr->addr;
> > +     struct arch_timer_context *timer;
> > +     u64 offset;
> > +
> > +     switch (attr->attr) {
> > +     case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> > +             timer = vcpu_vtimer(vcpu);
> > +             break;
> > +     default:
> > +             return -ENXIO;
>
> What is the rational for retrieving this offset the first place? I
> don't dislike the symmetry, but we already have an architectural way
> of getting it (read the counter registers).

I don't believe this is necessary any more.

The reason that I had exposed the virtual counter offset as a device
attribute was to separate VMM and guest manipulation of the virtual
counter. A VMM migrating an EL2 guest would likely want to adjust the
vtimer according to the difference in virtual counters between two
hosts without changing any guest-visible sysregs. However, if we go
with your suggestion above, the hypervisor would only ever need to
poke a physical offset attribute to make transparent changes to *both*
counters.

So, I suppose this is what I'm proposing: treat VMM writes to
CNTVOFF_EL2 the same as guest writes. For CNTPOFF_EL2, we do a special
dance; guest writes to CNTPOFF_EL2 will be visible in the register
_and_ change the value KVM writes to CNTPOFF_EL2 in hardware. Host
writes to a physical offset device attribute will cause KVM to change
the hardware value of CNTPOFF_EL2, but not update the guest-visible
register value. This way, a guest can be transparently migrated
between hosts with different counters.

>
> > +     }
> > +
> > +     offset = timer_get_offset(timer);
> > +     return put_user(offset, uaddr);
> > +}
> > +
> > +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> > +{
> > +     switch (attr->attr) {
> > +     case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
> > +     case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
> > +             return kvm_arm_timer_get_attr_irq(vcpu, attr);
> > +     case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> > +             return kvm_arm_timer_get_attr_offset(vcpu, attr);
> > +     }
> > +
> > +     return -ENXIO;
> > +}
> > +
> >  int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
> >  {
> >       switch (attr->attr) {
> >       case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
> >       case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
> > +     case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
> >               return 0;
> >       }
> >
>
> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 0f46f2588905..ecbab7adc602 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -139,6 +139,28 @@  configured values on other VCPUs.  Userspace should configure the interrupt
 numbers on at least one VCPU after creating all VCPUs and before running any
 VCPUs.
 
+2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_OFFSET_VTIMER
+------------------------------------------------
+
+:Parameters: Pointer to a 64-bit unsigned counter-timer offset.
+
+Returns:
+
+	 ======= ======================================
+	 -EFAULT Error reading/writing the provided
+	 	 parameter address
+	 -ENXIO  Attribute not supported
+	 ======= ======================================
+
+Specifies the guest's virtual counter-timer offset from the host's
+virtual counter. The guest's virtual counter is then derived by
+the following equation:
+
+  guest_cntvct = host_cntvct - KVM_ARM_VCPU_TIMER_OFFSET_VTIMER
+
+KVM does not allow the use of varying offset values for different vCPUs;
+the last written offset value will be broadcasted to all vCPUs in a VM.
+
 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
 ==================================
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index b3edde68bc3e..008d0518d2b1 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -365,6 +365,7 @@  struct kvm_arm_copy_mte_tags {
 #define KVM_ARM_VCPU_TIMER_CTRL		1
 #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER		0
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
+#define   KVM_ARM_VCPU_TIMER_OFFSET_VTIMER	2
 #define KVM_ARM_VCPU_PVTIME_CTRL	2
 #define   KVM_ARM_VCPU_PVTIME_IPA	0
 
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3df67c127489..d2b1b13af658 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1305,7 +1305,7 @@  static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
 	}
 }
 
-int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+int kvm_arm_timer_set_attr_irq(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
 	int __user *uaddr = (int __user *)(long)attr->addr;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
@@ -1338,7 +1338,39 @@  int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	return 0;
 }
 
-int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+int kvm_arm_timer_set_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+	u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+	u64 offset;
+
+	if (get_user(offset, uaddr))
+		return -EFAULT;
+
+	switch (attr->attr) {
+	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
+		update_vtimer_cntvoff(vcpu, offset);
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+	switch (attr->attr) {
+	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+		return kvm_arm_timer_set_attr_irq(vcpu, attr);
+	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
+		return kvm_arm_timer_set_attr_offset(vcpu, attr);
+	}
+
+	return -ENXIO;
+}
+
+int kvm_arm_timer_get_attr_irq(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
 	int __user *uaddr = (int __user *)(long)attr->addr;
 	struct arch_timer_context *timer;
@@ -1359,11 +1391,43 @@  int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 	return put_user(irq, uaddr);
 }
 
+int kvm_arm_timer_get_attr_offset(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+	u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+	struct arch_timer_context *timer;
+	u64 offset;
+
+	switch (attr->attr) {
+	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
+		timer = vcpu_vtimer(vcpu);
+		break;
+	default:
+		return -ENXIO;
+	}
+
+	offset = timer_get_offset(timer);
+	return put_user(offset, uaddr);
+}
+
+int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+	switch (attr->attr) {
+	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+		return kvm_arm_timer_get_attr_irq(vcpu, attr);
+	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
+		return kvm_arm_timer_get_attr_offset(vcpu, attr);
+	}
+
+	return -ENXIO;
+}
+
 int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
 	switch (attr->attr) {
 	case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
 	case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+	case KVM_ARM_VCPU_TIMER_OFFSET_VTIMER:
 		return 0;
 	}